Merge pull request #2568 from google/benvanik-flatcc-vm
Refactoring bytecode_module to use flatcc instead of flatbuffers C++.
diff --git a/.github/workflows/google_to_main.yml b/.github/workflows/google_to_main.yml
deleted file mode 100644
index dd85178..0000000
--- a/.github/workflows/google_to_main.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Creates a PR to merge the `google` branch into the `main` branch.
-
-name: google -> main
-
-on:
- schedule:
- # Every 6 hours at 45 minutes past the hour (to be a bit behind the TF submodule update)
- # 00:45, 06:45, 12:45, 18:45 UTC (04:45, 10:45, 16:45, 22:45 PST)
- - cron: '45 */6 * * *'
-
-jobs:
- google_to_main:
- # Don't run this in everyone's forks.
- if: github.repository == 'google/iree'
- runs-on: ubuntu-18.04
- steps:
- - name: Checking out repository
- uses: actions/checkout@v2
- with:
- ref: "google"
- # We have to explicitly fetch the main branch as well
- - name: Fetching Base Branch
- run: git fetch --no-tags --prune --depth=1 origin main
- - name: Checking for a diff
- run: |
- echo "::set-env name=has_diff::false"
- git diff main --exit-code || echo "::set-env name=has_diff::true"
- - name: Calculating PR body
- if: env.has_diff == 'true'
- run: |
- echo "::set-env name=pr_body::$(git log main.. --decorate=no --pretty='format:* %h %<(80,trunc)%s')"
- - name: Initializing submodules
- if: env.has_diff == 'true'
- run: ./scripts/git/submodule_versions.py init
- - name: Creating Pull Request
- if: env.has_diff == 'true'
- uses: peter-evans/create-pull-request@v2
- with:
- # Personal token is required to trigger additional automation (e.g. presubmits).
- token: ${{ secrets.GITHUB_WRITE_ACCESS_TOKEN }}
- commit-message: "Merge google -> main"
- title: "Merge google -> main"
- body: "${{ env.pr_body }}"
- committer: "Integrate Action <iree-github-actions-bot@google.com>"
- # TODO(gcmn): Figure out a way to assign this to someone dynamically.
- reviewers: gmngeoffrey
- branch: "google-to-main"
- base: "main"
diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml
index 123f5a4..457da97 100644
--- a/.github/workflows/publish_docs.yml
+++ b/.github/workflows/publish_docs.yml
@@ -33,9 +33,6 @@
uses: actions/checkout@v2
with:
token: ${{ secrets.GITHUB_WRITE_ACCESS_TOKEN }}
- - name: Fetching gh-pages branch
- run: |
- git fetch origin gh-pages
- name: Initializing submodules
run: ./scripts/git/submodule_versions.py init
- name: Installing Ninja build
@@ -45,6 +42,9 @@
./build_tools/cmake/build_docs.sh
# Patch the MarkDown files with front matter for rendering
./scripts/prepare_doc_publication.py ${IREE_DOC_BUILD_DIR}/doc
+ - name: Fetching gh-pages branch
+ run: |
+ git fetch origin gh-pages
- name: Updating gh-pages branch
run: |
git checkout -f gh-pages
diff --git a/.github/workflows/update_tf.yml b/.github/workflows/update_tf.yml
index 86c55d9..b116f99 100644
--- a/.github/workflows/update_tf.yml
+++ b/.github/workflows/update_tf.yml
@@ -54,6 +54,4 @@
Automated submodule bump from .github/workflows/update_tf.yml
committer: "Submodule Update Action <iree-github-actions-bot@google.com>"
- # TODO(gcmn): Figure out a way to assign this to someone dynamically.
- reviewers: gmngeoffrey
branch: "auto_submodule_update"
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000..9ef1dc1
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,4 @@
+[style]
+ based_on_style = google
+ column_limit = 80
+ indent_width = 2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 359d641..1a2377d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,6 +93,10 @@
if( IREE_HAL_DRIVERS_TO_BUILD STREQUAL "all" )
set( IREE_HAL_DRIVERS_TO_BUILD ${IREE_ALL_HAL_DRIVERS} )
+ # For cross compilation towords Android, we don't want LLVM JIT HAL driver.
+ if(ANDROID)
+ list(REMOVE_ITEM IREE_HAL_DRIVERS_TO_BUILD LLVM)
+ endif()
endif()
message(STATUS "Building HAL drivers ${IREE_HAL_DRIVERS_TO_BUILD}")
@@ -112,8 +116,8 @@
# List of all target backends to be built by default:
set(IREE_ALL_TARGET_BACKENDS
# TODO(scotttodd): LLVMAOT
- LLVMIR
- Vulkan_SPIRV
+ LLVM-IR
+ Vulkan-SPIRV
VMLA
)
@@ -202,7 +206,6 @@
include(iree_cc_embed_data)
include(iree_bytecode_module)
include(iree_multipy)
-include(iree_py_test)
include(iree_lit_test)
include(iree_add_all_subdirs)
include(iree_check_test)
@@ -287,6 +290,8 @@
message(STATUS "Adding bundled LLVM source dependency")
add_iree_mlir_src_dep("third_party/llvm-project")
elseif(${IREE_MLIR_DEP_MODE} STREQUAL "INSTALLED")
+ # Deps of installed MLIR/LLVM packages.
+ find_package(ZLIB) # See: https://reviews.llvm.org/D79219
message(STATUS "Looking for installed MLIR/LLVM packages (configure with MLIR_DIR variable)")
find_package(MLIR REQUIRED CONFIG)
message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}")
@@ -325,7 +330,8 @@
find_package(PythonInterp 3 REQUIRED)
endif()
if(${IREE_BUILD_PYTHON_BINDINGS})
- find_package(PythonLibs 3 REQUIRED)
+ # Note: Optional because python libs can be manually specified.
+ find_package(PythonLibs 3)
endif()
list(APPEND CMAKE_MODULE_PATH
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a906067..0f7897d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -58,7 +58,7 @@
## Peculiarities
Our documentation on
-[repository management](https://github.com/google/iree/blob/main/docs/repository_management.md)
+[repository management](https://github.com/google/iree/blob/main/docs/developing_iree/repository_management.md)
has more information on some of the oddities in our repository setup and
workflows. For the most part, these should be transparent to normal developer
workflows.
diff --git a/README.md b/README.md
index e6de0e2..129547c 100644
--- a/README.md
+++ b/README.md
@@ -37,8 +37,8 @@
working on enabling macOS support. For deployment, IREE aims to additionally
cover Android and iOS.
-Please see the [Getting Started](https://google.github.io/iree/GetStarted) pages
-on IREE's [documentation hub](https://google.github.io/iree) to configure,
+Please see the [Getting Started](https://google.github.io/iree/get-started)
+pages on IREE's [documentation hub](https://google.github.io/iree) to configure,
compile, and run IREE in your favorite development environment!
## Documentation and Talks
@@ -68,7 +68,7 @@
The architecture of IREE is best illustrated by the following picture:
-
+
Being compilation-based means IREE does not have a traditional runtime that
dispatches "ops" to their fat kernel implementations. What IREE provides is a
@@ -100,8 +100,8 @@
## Roadmap and Milestones
IREE is still at its early stage; we have lots of exciting future plans. Please
-check out the [long-term design roadmap](./docs/roadmap_design.md) and
-[short-term focus areas](./docs/roadmap.md).
+check out the [long-term design roadmap](./docs/design_roadmap.md) and
+[short-term focus areas](./docs/milestones.md).
We use [GitHub Projects](https://github.com/google/iree/projects) to track
various IREE components and
diff --git a/SUBMODULE_VERSIONS b/SUBMODULE_VERSIONS
index ba8dd45..d74192b 100644
--- a/SUBMODULE_VERSIONS
+++ b/SUBMODULE_VERSIONS
@@ -4,15 +4,15 @@
a5d9d0f7d368054fd1691aedf1db4116efcc233e third_party/flatbuffers
4fb0ff7069bd88ee85902f4d0bb62794e5f6d021 third_party/flatcc
f2fb48c3b3d79a75a88a99fba6576b25d42ec528 third_party/googletest
-7ca9b589c45302feb28c0b3b0e80088c0901bb40 third_party/llvm-project
+eed333149d178b69fdaf39b9419b7ca032520182 third_party/llvm-project
17b12a4481daa150e2d1ea3ada086b551b856707 third_party/marl
-67f3ccebee84f3488b46a8d3ac005178c52ff264 third_party/mlir-emitc
+80885f899e12d55a45561ef758eea47bb340dbf1 third_party/mlir-emitc
80d452484c5409444b0ec19383faa84bb7a4d351 third_party/pybind11
9f53ba413e6fc879236dcaa3e008915973d67a4f third_party/ruy
-b73f111094da3e380a1774b56b15f16c90ae8e23 third_party/sdl2
+a1390ed39ec77ecfb574bc6fcd5bfc5e3adbdea9 third_party/sdl2
f8bf11a0253a32375c32cad92c841237b96696c0 third_party/spirv_headers
57eb48aed36160c4876bc8310d9ca84d42ee9e2a third_party/swiftshader
-e36aca0132fbcde0bc820d56185e3078f97a879d third_party/tensorflow
+e29e1f4e574caab071e93cfb91fa9ee0944cd87c third_party/tensorflow
864d86e8b6d21449474db5e9313dbff90aa9c24f third_party/tracy
9bd3f561bcee3f01d22912de10bb07ce4e23d378 third_party/vulkan_headers
909f36b714c9239ee0b112a321220213a474ba53 third_party/vulkan_memory_allocator
diff --git a/WORKSPACE b/WORKSPACE
index 702a087..cf098ce 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -63,7 +63,7 @@
rbe_autoconfig(
name = "rbe_default",
base_container_digest = "sha256:1a8ed713f40267bb51fe17de012fa631a20c52df818ccb317aaed2ee068dfc61",
- digest = "sha256:b59d8cc422b03524394d4d05e443bf38d4fe96fab06197b34174de01572e8161",
+ digest = "sha256:bc2d61ad05453928e67b434ae019e7d050dda46c091270f2b81b2f09da2276ce",
registry = "gcr.io",
repository = "iree-oss/rbe-toolchain",
use_checked_in_confs = "Force",
diff --git a/bindings/python/build_tools/cmake/iree_py_test.cmake b/bindings/python/build_tools/cmake/iree_py_test.cmake
deleted file mode 100644
index 0fbb6f0..0000000
--- a/bindings/python/build_tools/cmake/iree_py_test.cmake
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-include(CMakeParseArguments)
-
-# iree_py_test()
-#
-# CMake function to imitate Bazel's iree_py_test rule.
-#
-# Parameters:
-# NAME: name of test
-# SRCS: List of source file
-# DEPS: List of deps the test requires
-# LABELS: Additional labels to apply to the test. The package path is added
-# automatically.
-
-function(iree_py_test)
- if(NOT IREE_BUILD_TESTS)
- return()
- endif()
-
- cmake_parse_arguments(
- _RULE
- ""
- "NAME"
- "SRCS;DEPS;LABELS"
- ${ARGN}
- )
-
- iree_package_name(_PACKAGE_NAME)
- set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}")
-
- iree_package_ns(_PACKAGE_NS)
- string(REPLACE "::" "/" _PACKAGE_PATH ${_PACKAGE_NS})
- set(_NAME_PATH "${_PACKAGE_PATH}:${_RULE_NAME}")
-
- add_test(
- NAME ${_NAME_PATH}
- COMMAND ${CMAKE_SOURCE_DIR}/build_tools/cmake/run_test.sh ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/${_RULE_SRCS}"
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- )
-
- list(APPEND _RULE_LABELS "${_PACKAGE_PATH}")
- set_property(TEST ${_NAME_PATH} PROPERTY LABELS "${_RULE_LABELS}")
- set_property(TEST ${_NAME_PATH} PROPERTY ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/bindings/python:$ENV{PYTHONPATH};TEST_TMPDIR=${_NAME}_test_tmpdir")
- # TODO(marbre): Find out how to add deps to tests.
- # Similar to _RULE_DATA in iree_lit_test().
-
-endfunction()
diff --git a/bindings/python/build_tools/python/generate_build.py b/bindings/python/build_tools/python/generate_build.py
index 6d8cff4..6705cfd 100644
--- a/bindings/python/build_tools/python/generate_build.py
+++ b/bindings/python/build_tools/python/generate_build.py
@@ -64,7 +64,7 @@
cc_library(
name = "python_headers",
- hdrs = glob(["include/*.h"]),
+ hdrs = glob(["include/**/*.h"]),
srcs = [{extra_srcs}],
includes = ["include"],
linkopts = [],
diff --git a/bindings/python/pyiree/compiler/compiler.cc b/bindings/python/pyiree/compiler/compiler.cc
index cfd8289..bb356f9 100644
--- a/bindings/python/pyiree/compiler/compiler.cc
+++ b/bindings/python/pyiree/compiler/compiler.cc
@@ -337,7 +337,7 @@
mlir::applyPassManagerCLOptions(pass_manager);
auto crash_reproducer_path = context_->crash_reproducer_path();
if (crash_reproducer_path) {
- pass_manager.enableCrashReproducerGeneration(*crash_reproducer_path);
+ pass_manager.enableCrashReproducerGeneration(*crash_reproducer_path, true);
}
mlir::iree_compiler::IREE::HAL::TargetOptions executable_options;
diff --git a/bindings/python/pyiree/rt/system_api.py b/bindings/python/pyiree/rt/system_api.py
index 555ce32..aaea01f 100644
--- a/bindings/python/pyiree/rt/system_api.py
+++ b/bindings/python/pyiree/rt/system_api.py
@@ -81,14 +81,12 @@
device = driver.create_default_device()
except Exception as ex:
print(
- "Could not create default driver device %s: %r" % (
- driver_name, ex),
+ "Could not create default driver device %s: %r" % (driver_name, ex),
file=sys.stderr)
driver_exceptions[driver_name] = ex
continue
- print(
- "Created IREE driver %s: %r" % (driver_name, driver), file=sys.stderr)
+ print("Created IREE driver %s: %r" % (driver_name, driver), file=sys.stderr)
return driver
# All failed.
diff --git a/build_tools/bazel/build_tensorflow.sh b/build_tools/bazel/build_tensorflow.sh
index 07b0eb8..be82227 100755
--- a/build_tools/bazel/build_tensorflow.sh
+++ b/build_tools/bazel/build_tensorflow.sh
@@ -43,6 +43,11 @@
--test_env=IREE_LLVMJIT_DISABLE=$IREE_LLVMJIT_DISABLE
--test_env=IREE_VULKAN_DISABLE=$IREE_VULKAN_DISABLE
)
+# Pass in VK_ICD_FILENAMES if exists so that the Vulkan loader can find the
+# Vulkan implementation.
+if [[ -v VK_ICD_FILENAMES ]]; then
+ test_env_args+=(--test_env=VK_ICD_FILENAMES=$VK_ICD_FILENAMES)
+fi
declare -a default_build_tag_filters=("-nokokoro")
declare -a default_test_tag_filters=("-nokokoro")
@@ -87,8 +92,6 @@
--config=generic_clang \
--build_tag_filters="${BUILD_TAG_FILTERS?}" \
--test_tag_filters="${TEST_TAG_FILTERS?}" \
+ --config=rs \
--test_output=errors \
--keep_going
- # TODO: Enable result store once the Kokoro VMs used for this test have the
- # appropriate auth.
- # --config=rs
diff --git a/build_tools/bazel/iree_flatcc.bzl b/build_tools/bazel/iree_flatcc.bzl
index c355563..e091af6 100644
--- a/build_tools/bazel/iree_flatcc.bzl
+++ b/build_tools/bazel/iree_flatcc.bzl
@@ -24,19 +24,19 @@
flatcc_rt = "@com_github_dvidelabs_flatcc//:runtime"
flags = [
- "-o$(RULEDIR)",
+ "-o$(RULEDIR)",
] + flatcc_args
out_stem = "%s" % (srcs[0].replace(".fbs", ""))
outs = []
for arg in flags:
- if arg == "--reader":
- outs += ["%s_reader.h" % (out_stem)]
- if arg == "--builder":
- outs += ["%s_builder.h" % (out_stem)]
- if arg == "--verifier":
- outs += ["%s_verifier.h" % (out_stem)]
+ if arg == "--reader":
+ outs += ["%s_reader.h" % (out_stem)]
+ if arg == "--builder":
+ outs += ["%s_builder.h" % (out_stem)]
+ if arg == "--verifier":
+ outs += ["%s_verifier.h" % (out_stem)]
native.genrule(
name = name + "_gen",
@@ -50,7 +50,7 @@
name = name,
hdrs = outs,
deps = [
- flatcc_rt,
+ flatcc_rt,
],
testonly = testonly,
**kwargs
diff --git a/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel b/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
index bade7ab..befc20c 100644
--- a/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
+++ b/build_tools/bazel/third_party_import/llvm-project/overlay/llvm/BUILD.bazel
@@ -155,10 +155,10 @@
name = "InstCombineTableGen",
tbl_outs = [(
"-gen-searchable-tables",
- "lib/Transforms/InstCombine/InstCombineTables.inc",
+ "lib/Target/AMDGPU/InstCombineTables.inc",
)],
tblgen = ":llvm-tblgen",
- td_file = "lib/Transforms/InstCombine/InstCombineTables.td",
+ td_file = "lib/Target/AMDGPU/InstCombineTables.td",
td_srcs = glob([
"include/llvm/CodeGen/*.td",
"include/llvm/IR/Intrinsics*.td",
@@ -721,6 +721,7 @@
"lib/Analysis/*.h",
],
exclude = [
+ "lib/Analysis/DevelopmentModeInlineAdvisor.cpp",
"lib/Analysis/MLInlineAdvisor.cpp",
"lib/Analysis/ReleaseModeModelRunner.cpp",
"lib/Analysis/TFUtils.cpp",
@@ -3187,6 +3188,7 @@
]),
copts = llvm_copts,
deps = [
+ ":BinaryFormat",
":DebugInfoCodeView",
":MC",
":Object",
diff --git a/build_tools/bazel/third_party_import/llvm-project/overlay/mlir/BUILD.bazel b/build_tools/bazel/third_party_import/llvm-project/overlay/mlir/BUILD.bazel
index ec0574f..7e42a4b 100644
--- a/build_tools/bazel/third_party_import/llvm-project/overlay/mlir/BUILD.bazel
+++ b/build_tools/bazel/third_party_import/llvm-project/overlay/mlir/BUILD.bazel
@@ -387,7 +387,7 @@
"include/mlir/Interfaces/CallInterfaces.td",
"include/mlir/Interfaces/ControlFlowInterfaces.td",
"include/mlir/Interfaces/SideEffectInterfaces.td",
- "include/mlir/Interfaces/VectorUnrollInterface.td",
+ "include/mlir/Interfaces/VectorInterfaces.td",
"include/mlir/Interfaces/ViewLikeInterface.td",
":OpBaseTdFiles",
],
@@ -500,6 +500,7 @@
deps = [
":Affine",
":IR",
+ ":Support",
"@llvm-project//llvm:Support",
],
)
@@ -647,13 +648,13 @@
)
cc_library(
- name = "VectorUnrollInterface",
- srcs = ["lib/Interfaces/VectorUnrollInterface.cpp"],
- hdrs = ["include/mlir/Interfaces/VectorUnrollInterface.h"],
+ name = "VectorInterfaces",
+ srcs = ["lib/Interfaces/VectorInterfaces.cpp"],
+ hdrs = ["include/mlir/Interfaces/VectorInterfaces.h"],
includes = ["include"],
deps = [
":IR",
- ":VectorUnrollInterfaceIncGen",
+ ":VectorInterfacesIncGen",
],
)
@@ -739,6 +740,7 @@
":MLIRShapeCanonicalizationIncGen",
":ShapeOpsIncGen",
":SideEffectInterfaces",
+ ":StandardOps",
":Support",
"@llvm-project//llvm:Support",
],
@@ -757,32 +759,12 @@
":Pass",
":SCFDialect",
":Shape",
- ":ShapeToStandardPatternsIncGen",
":StandardOps",
":Support",
":Transforms",
],
)
-gentbl(
- name = "ShapeToStandardPatternsIncGen",
- strip_include_prefix = "include/mlir/Conversion/ShapeToStandard",
- tbl_outs = [
- (
- "-gen-rewriters",
- "include/mlir/Conversion/ShapeToStandard/ShapeToStandardPatterns.inc",
- ),
- ],
- tblgen = ":mlir-tblgen",
- td_file = "lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td",
- td_srcs = [
- ":StdOpsTdFiles",
- "include/mlir/Dialect/Shape/IR/ShapeBase.td",
- "include/mlir/Dialect/Shape/IR/ShapeOps.td",
- "include/mlir/Interfaces/InferTypeOpInterface.td",
- ],
-)
-
cc_library(
name = "ShapeToSCF",
srcs = glob([
@@ -855,7 +837,7 @@
":SideEffectInterfaces",
":StandardOpsIncGen",
":Support",
- ":VectorUnrollInterface",
+ ":VectorInterfaces",
":ViewLikeInterface",
"@llvm-project//llvm:Support",
],
@@ -918,9 +900,9 @@
":SideEffectInterfaces",
":StandardOps",
":Support",
+ ":VectorInterfaces",
":VectorOpsIncGen",
":VectorTransformPatternsIncGen",
- ":VectorUnrollInterface",
"@llvm-project//llvm:Support",
],
)
@@ -2127,20 +2109,20 @@
)
gentbl(
- name = "VectorUnrollInterfaceIncGen",
+ name = "VectorInterfacesIncGen",
strip_include_prefix = "include",
tbl_outs = [
(
"-gen-op-interface-decls",
- "include/mlir/Interfaces/VectorUnrollInterface.h.inc",
+ "include/mlir/Interfaces/VectorInterfaces.h.inc",
),
(
"-gen-op-interface-defs",
- "include/mlir/Interfaces/VectorUnrollInterface.cpp.inc",
+ "include/mlir/Interfaces/VectorInterfaces.cpp.inc",
),
],
tblgen = ":mlir-tblgen",
- td_file = "include/mlir/Interfaces/VectorUnrollInterface.td",
+ td_file = "include/mlir/Interfaces/VectorInterfaces.td",
td_srcs = [
":OpBaseTdFiles",
],
@@ -3271,6 +3253,7 @@
":QuantPassIncGen",
":SideEffectInterfaces",
":StandardOps",
+ ":TransformUtils",
"@llvm-project//llvm:Support",
],
)
@@ -3586,7 +3569,7 @@
name = "VectorOpsTdFiles",
srcs = [
"include/mlir/Dialect/Vector/VectorOps.td",
- "include/mlir/Interfaces/VectorUnrollInterface.td",
+ "include/mlir/Interfaces/VectorInterfaces.td",
":AffineOpsTdFiles",
":OpBaseTdFiles",
],
@@ -3716,7 +3699,7 @@
"include/mlir/Interfaces/ControlFlowInterfaces.h",
"include/mlir/Interfaces/ControlFlowInterfaces.td",
"include/mlir/Interfaces/SideEffectInterfaces.td",
- "include/mlir/Interfaces/VectorUnrollInterface.td",
+ "include/mlir/Interfaces/VectorInterfaces.td",
"include/mlir/Interfaces/ViewLikeInterface.td",
"include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
"include/mlir/Dialect/StandardOps/IR/Ops.td",
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
index d2df056..bc40c97 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py
@@ -237,8 +237,8 @@
def _convert_flatcc_args_block(self, flatcc_args):
if not flatcc_args:
return ""
- flatcc_args = "\n".join([f' "{flatcc_arg}"'
- for flatcc_arg in flatcc_args])
+ flatcc_args = "\n".join(
+ [f' "{flatcc_arg}"' for flatcc_arg in flatcc_args])
return f" FLATCC_ARGS\n{flatcc_args}\n"
def _convert_unimplemented_function(self, function, details=""):
diff --git a/build_tools/cmake/build_docs.sh b/build_tools/cmake/build_docs.sh
index d4a3c84..191ed17 100755
--- a/build_tools/cmake/build_docs.sh
+++ b/build_tools/cmake/build_docs.sh
@@ -49,34 +49,12 @@
cd ${ROOT_DIR?}
+cp README.md ${BUILD_DIR}/doc/index.md
+cp -rf docs/* ${BUILD_DIR}/doc/
+
# Update op_coverage.md
scripts/update_op_coverage.py ${BUILD_DIR}
# Update e2e_coverage.md
PYTHON_BIN=`which python3` scripts/update_e2e_coverage.py ${BUILD_DIR}
-# Copy a curated list of docs to publish. This is expected to cover all docs
-# under docs/ after they are refreshed.
-
-cp README.md ${BUILD_DIR}/doc/index.md
-cp docs/IREE-Architecture.svg ${BUILD_DIR}/doc/
-
-cp docs/roadmap.md ${BUILD_DIR}/doc/
-cp docs/roadmap_design.md ${BUILD_DIR}/doc/
-cp docs/developer_overview.md ${BUILD_DIR}/doc/
-cp docs/testing_guide.md ${BUILD_DIR}/doc/
-cp docs/iree_community.md ${BUILD_DIR}/doc/
-
-mkdir -p ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_windows_bazel.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_windows_cmake.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_windows_vulkan.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_linux_bazel.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_linux_cmake.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_linux_vulkan.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_macos_bazel.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_macos_cmake.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_android_cmake.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/getting_started_python.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/generic_vulkan_env_setup.md ${BUILD_DIR}/doc/GetStarted/
-cp docs/GetStarted/cmake_options_and_variables.md ${BUILD_DIR}/doc/GetStarted/
diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake
index aa794bd..93fd934 100644
--- a/build_tools/cmake/iree_check_test.cmake
+++ b/build_tools/cmake/iree_check_test.cmake
@@ -173,6 +173,17 @@
${ARGN}
)
+
+ string(TOUPPER ${_RULE_DRIVER} _UPPERCASE_DRIVER)
+ if(NOT IREE_HAL_DRIVER_${_UPPERCASE_DRIVER})
+ return()
+ endif()
+
+ string(TOUPPER ${_RULE_TARGET_BACKEND} _UPPERCASE_TARGET_BACKEND)
+ if(NOT IREE_TARGET_BACKEND_${_UPPERCASE_TARGET_BACKEND})
+ return()
+ endif()
+
foreach(_SRC IN LISTS _RULE_SRCS)
set(_TEST_NAME "${_RULE_NAME}_${_SRC}")
iree_check_test(
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index c39db21..7563c1f 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -250,3 +250,15 @@
${PROJECT_BINARY_DIR}/build_tools/third_party/tensorflow
${PROJECT_BINARY_DIR}/build_tools/third_party/tensorflow/tensorflow/compiler/mlir/hlo/include/
)
+
+#-------------------------------------------------------------------------------
+# Third party: mlir-emitc
+#-------------------------------------------------------------------------------
+
+if(IREE_ENABLE_EMITC)
+ list(APPEND IREE_COMMON_INCLUDE_DIRS
+ ${PROJECT_SOURCE_DIR}/third_party/mlir-emitc/include
+ ${PROJECT_BINARY_DIR}/third_party/mlir-emitc/include
+ )
+ add_definitions(-DIREE_HAVE_EMITC_DIALECT)
+endif()
diff --git a/build_tools/cmake/iree_multipy.cmake b/build_tools/cmake/iree_multipy.cmake
index a53995a..a7fd1f1 100644
--- a/build_tools/cmake/iree_multipy.cmake
+++ b/build_tools/cmake/iree_multipy.cmake
@@ -22,14 +22,17 @@
# Configure the defaults.
# Note that this is using the pybind11 configuration vars, which creates
# a fragile dependency. It would be better to derive these locally.
- set(IREE_MULTIPY_DEFAULT_INCLUDE_DIRS "${PYTHON_INCLUDE_DIRS}" CACHE INTERNAL "Python include dirs" )
- set(IREE_MULTIPY_DEFAULT_LIBRARIES "${PYTHON_LIBRARIES}" CACHE INTERNAL "Python libraries")
- set(IREE_MULTIPY_DEFAULT_PREFIX "${PYTHON_MODULE_PREFIX}" CACHE INTERNAL "Python module prefix")
- set(IREE_MULTIPY_DEFAULT_SUFFIX "${PYTHON_MODULE_SUFFIX}" CACHE INTERNAL "Python module suffix")
- set(IREE_MULTIPY_DEFAULT_EXTENSION "${PYTHON_MODULE_EXTENSION}" CACHE INTERNAL "Python module extension")
+ if(PYTHONLIBS_FOUND)
+ set(IREE_MULTIPY_DEFAULT_EXECUTABLE "${PYTHON_EXECUTABLE}" CACHE INTERNAL "Python executable" )
+ set(IREE_MULTIPY_DEFAULT_INCLUDE_DIRS "${PYTHON_INCLUDE_DIRS}" CACHE INTERNAL "Python include dirs" )
+ set(IREE_MULTIPY_DEFAULT_LIBRARIES "${PYTHON_LIBRARIES}" CACHE INTERNAL "Python libraries")
+ set(IREE_MULTIPY_DEFAULT_PREFIX "${PYTHON_MODULE_PREFIX}" CACHE INTERNAL "Python module prefix")
+ set(IREE_MULTIPY_DEFAULT_SUFFIX "${PYTHON_MODULE_SUFFIX}" CACHE INTERNAL "Python module suffix")
+ set(IREE_MULTIPY_DEFAULT_EXTENSION "${PYTHON_MODULE_EXTENSION}" CACHE INTERNAL "Python module extension")
+ endif()
if(IREE_MULTIPY_VERSIONS)
- set(IREE_MULTIPY_VERSIONS_EFFECTIVE "${IREE_MULTIPY_VERSIONS}")
+ set(IREE_MULTIPY_VERSIONS_EFFECTIVE "${IREE_MULTIPY_VERSIONS}" CACHE INTERNAL "Python extension versions")
else()
message(STATUS "Multi-python extension versions not found: using defaults")
set(IREE_MULTIPY_VERSIONS_EFFECTIVE "DEFAULT" CACHE INTERNAL "Python extension versions")
@@ -39,18 +42,22 @@
message(STATUS "Multipy extension versions: ${IREE_MULTIPY_VERSIONS_EFFECTIVE}")
foreach(V ${IREE_MULTIPY_VERSIONS_EFFECTIVE})
message(STATUS " - Multipy version ${V}")
+ message(STATUS " : EXECUTABLE = ${IREE_MULTIPY_${V}_EXECUTABLE}")
message(STATUS " : INCLUDE_DIRS = ${IREE_MULTIPY_${V}_INCLUDE_DIRS}")
message(STATUS " : LIBRARIES = ${IREE_MULTIPY_${V}_LIBRARIES}")
message(STATUS " : PREFIX = ${IREE_MULTIPY_${V}_PREFIX}")
message(STATUS " : SUFFIX = ${IREE_MULTIPY_${V}_SUFFIX}")
message(STATUS " : EXTENSION = ${IREE_MULTIPY_${V}_EXTENSION}")
- # Only INCLUDE_DIRS and EXTENSION are needed for all configs.
+ # Check for required settings.
if(NOT IREE_MULTIPY_${V}_INCLUDE_DIRS)
- message(FATAL "MULTIPY config ${V}: No IREE_MULTIPY_{VER}_INCLUDE_DIRS var")
+ message(FATAL " MULTIPY version ${V}: No IREE_MULTIPY_${VER}_EXECUTABLE var")
+ endif()
+ if(NOT IREE_MULTIPY_${V}_INCLUDE_DIRS)
+ message(FATAL " MULTIPY version ${V}: No IREE_MULTIPY_${VER}_INCLUDE_DIRS var")
endif()
if(NOT IREE_MULTIPY_${V}_EXTENSION)
- message(FATAL "MULTIPY config ${V}: No IREE_MULTIPY_{VER}_EXTENSION var")
+ message(FATAL " MULTIPY version ${V}: No IREE_MULTIPY_${VER}_EXTENSION var")
endif()
endforeach()
endfunction()
@@ -100,7 +107,18 @@
foreach(V ${IREE_MULTIPY_VERSIONS_EFFECTIVE})
set(VER_NAME "${_NAME}__${V}")
- add_library(${VER_NAME} SHARED ${ARG_SRCS})
+
+ # If configured to link against libraries, build in SHARED mode (which
+ # disallows undefined symbols). Otherwise, build in MODULE mode, which
+ # does not enforce that. This should naturally do the right thing on
+ # each platform based on whether configured with a list of libraries to
+ # link or not.
+ set(LIBRARY_TYPE MODULE)
+ if(IREE_MULTIPY_${V}_LIBRARIES)
+ set(LIBRARY_TYPE SHARED)
+ endif()
+
+ add_library(${VER_NAME} ${LIBRARY_TYPE} ${ARG_SRCS})
add_dependencies(${_NAME} ${VER_NAME})
set_target_properties(
${VER_NAME} PROPERTIES
@@ -132,10 +150,10 @@
# Track target and deps, use in iree_complete_py_extension_link_options() later.
# See iree_complete_py_extension_link_options() in iree_py_extension.cmake
# TODO: Move that implementation here.
- list(TRANSFORM ARG_PYEXT_DEPS APPEND "__${V}")
+ set(TRANSFORMED_PYEXT_DEPS "${ARG_PYEXT_DEPS}")
+ list(TRANSFORM TRANSFORMED_PYEXT_DEPS APPEND "__${V}")
set_property(GLOBAL APPEND PROPERTY _IREE_PY_EXTENSION_NAMES "${VER_NAME}")
- set_property(TARGET ${VER_NAME} PROPERTY DIRECT_DEPS ${ARG_DEPS} ${ARG_PYEXT_DEPS})
-
+ set_property(TARGET ${VER_NAME} PROPERTY DIRECT_DEPS ${ARG_DEPS} ${TRANSFORMED_PYEXT_DEPS})
_alias_iree_pyext_library("${ARG_NAME}" "${V}" ${VER_NAME})
endforeach()
endfunction()
@@ -167,11 +185,12 @@
"${IREE_MULTIPY_${V}_INCLUDE_DIRS}"
"$<BUILD_INTERFACE:${IREE_COMMON_INCLUDE_DIRS}>"
)
- list(TRANSFORM ARG_PYEXT_DEPS APPEND "__${V}")
+ set(TRANSFORMED_PYEXT_DEPS "${ARG_PYEXT_DEPS}")
+ list(TRANSFORM TRANSFORMED_PYEXT_DEPS APPEND "__${V}")
target_link_libraries(${VER_NAME}
PUBLIC
${ARG_DEPS}
- ${ARG_PYEXT_DEPS}
+ ${TRANSFORMED_PYEXT_DEPS}
PRIVATE
${IREE_DEFAULT_LINKOPTS}
)
@@ -248,6 +267,55 @@
${name} PROPERTIES CXX_VISIBILITY_PRESET "hidden")
endfunction()
+# iree_py_test()
+#
+# CMake function to imitate Bazel's iree_py_test rule.
+#
+# Parameters:
+# NAME: name of test
+# SRCS: List of source file
+# DEPS: List of deps the test requires
+# LABELS: Additional labels to apply to the test. The package path is added
+# automatically.
+
+function(iree_py_test)
+ if(NOT IREE_BUILD_TESTS)
+ return()
+ endif()
+
+ cmake_parse_arguments(
+ _RULE
+ ""
+ "NAME"
+ "SRCS;DEPS;LABELS"
+ ${ARGN}
+ )
+
+ iree_package_name(_PACKAGE_NAME)
+ set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}")
+
+ iree_package_ns(_PACKAGE_NS)
+ string(REPLACE "::" "/" _PACKAGE_PATH ${_PACKAGE_NS})
+ set(_NAME_PATH "${_PACKAGE_PATH}:${_RULE_NAME}")
+ list(APPEND _RULE_LABELS "${_PACKAGE_PATH}")
+
+ foreach(V ${IREE_MULTIPY_VERSIONS_EFFECTIVE})
+ set(VER_NAME "${_NAME_PATH}__${V}")
+ add_test(
+ NAME ${VER_NAME}
+ COMMAND
+ "${CMAKE_SOURCE_DIR}/build_tools/cmake/run_test.${IREE_HOST_SCRIPT_EXT}"
+ "${IREE_MULTIPY_${V}_EXECUTABLE}"
+ "${CMAKE_CURRENT_SOURCE_DIR}/${_RULE_SRCS}"
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ )
+
+ set_property(TEST ${VER_NAME} PROPERTY LABELS "${_RULE_LABELS}")
+ set_property(TEST ${VER_NAME} PROPERTY ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/bindings/python:$ENV{PYTHONPATH};TEST_TMPDIR=${_NAME}_${V}_test_tmpdir")
+ # TODO(marbre): Find out how to add deps to tests.
+ # Similar to _RULE_DATA in iree_lit_test().
+ endforeach()
+endfunction()
###############################################################################
# Always-link/transitive dependency management
@@ -378,7 +446,6 @@
${_STANDARD_DEPS}
PRIVATE
${_RULE_LINKOPTS}
- ${PYTHON_LIBRARY}
)
else()
target_link_libraries(${_NAME}
@@ -389,7 +456,6 @@
${_STANDARD_DEPS}
PRIVATE
${_RULE_LINKOPTS}
- ${PYTHON_LIBRARY}
)
endif()
endforeach(_NAME)
diff --git a/build_tools/cmake/run_android_test.sh b/build_tools/cmake/run_android_test.sh
index 292890c..0c7510d 100755
--- a/build_tools/cmake/run_android_test.sh
+++ b/build_tools/cmake/run_android_test.sh
@@ -35,7 +35,7 @@
set -x
set -e
-adb push $TEST_EXECUTABLE $TEST_ANDROID_ABS_DIR/$(basename $TEST_EXECUTABLE)
+adb push $TEST_EXECUTABLE $TEST_ANDROID_ABS_DIR/$(basename $TEST_EXECUTABLE) 1>/dev/null
if [ -n "$TEST_DATA" ]; then
adb push $TEST_DATA $TEST_ANDROID_ABS_DIR/$(basename $TEST_DATA)
diff --git a/build_tools/docker/bazel/Dockerfile b/build_tools/docker/bazel/Dockerfile
index e9a587c..c42521a 100644
--- a/build_tools/docker/bazel/Dockerfile
+++ b/build_tools/docker/bazel/Dockerfile
@@ -27,40 +27,49 @@
FROM ubuntu:18.04
WORKDIR /usr/src/iree
-RUN apt-get update
-
# Set environment variables.
ENV CXX clang++
ENV CC clang
ENV PYTHON_BIN /usr/bin/python3
ENV IREE_LLVMAOT_LINKER_PATH /usr/bin/ld
-# Install git for updating IREE's submodules.
-RUN apt-get install -y git
+RUN apt-get update \
+ && apt-get install -y \
+ # git for updating IREE's submodules.
+ git \
+ # utilities for later installations
+ unzip \
+ zip \
+ wget \
+ # core IREE dependencies.
+ clang \
+ libsdl2-dev
+
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
# Install Bazel.
# https://docs.bazel.build/versions/master/install-ubuntu.html
-ARG BAZEL_VERSION=2.1.0
-RUN apt-get install -y unzip zip wget \
- && wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION?}/bazel-${BAZEL_VERSION?}-installer-linux-x86_64.sh" \
- && chmod +x "bazel-${BAZEL_VERSION?}-installer-linux-x86_64.sh" \
- && "./bazel-${BAZEL_VERSION?}-installer-linux-x86_64.sh" --user \
- && rm "bazel-${BAZEL_VERSION?}-installer-linux-x86_64.sh"
-# Install a newer version of Bazel. We don't need the full installation now.
-# Just need to provide a different version for the version-identification
-# wrapper script to find in /root/.bazel/bin
+ARG BAZEL_VERSION=3.3.1
+# Change to a new version if upgrading Bazel.
ARG NEW_BAZEL_VERSION=3.3.1
-RUN cd "/root/.bazel/bin" \
- && wget "https://releases.bazel.build/${NEW_BAZEL_VERSION?}/release/bazel-${NEW_BAZEL_VERSION?}-linux-x86_64" \
- && chmod +x "bazel-${NEW_BAZEL_VERSION?}-linux-x86_64"
-# ENV does not allow ${variable?} syntax.
-ENV PATH "/root/bin:${PATH}"
+RUN wget -qO - https://bazel.build/bazel-release.pub.gpg | apt-key add - \
+ && echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" \
+ | tee /etc/apt/sources.list.d/bazel.list \
+ && apt-get update \
+ # Install Bazel pinned at the version we want. Optionally install an
+ # additional version of Bazel to ease upgrades (modify NEW_BAZEL_VERSION
+ # above). Bazel does some shenanigans to select the correct version based on
+ # your .bazelversion file. When upgrading, we therefore need to have both the
+ # old and new version. When the versions are the same this second installation
+ # is effectively a noop.
+ && apt-get install "bazel=${BAZEL_VERSION?}" "bazel-${NEW_BAZEL_VERSION?}"
-# Install core IREE dependencies.
-RUN apt-get install -y clang libsdl2-dev
-
-# Install python2 numpy. Temporary fix for issue #1737:
-# https://github.com/google/iree/issues/1737
-RUN apt-get install -y python-pip \
- && python -m pip install --upgrade pip \
- && python -m pip install numpy
+# TF requires python2 numpy at configure time...
+# TODO(#1737): Remove this
+RUN apt-get update \
+ && apt-get install -y python-pip \
+ && python -m pip install --upgrade pip \
+ && python -m pip install numpy
diff --git a/build_tools/docker/bazel_bindings/Dockerfile b/build_tools/docker/bazel_bindings/Dockerfile
index f7ef30f..8f07958 100644
--- a/build_tools/docker/bazel_bindings/Dockerfile
+++ b/build_tools/docker/bazel_bindings/Dockerfile
@@ -27,6 +27,11 @@
FROM gcr.io/iree-oss/bazel
# Install python3 and numpy.
-RUN apt-get install -y python3 python3-dev python3-pip python3-setuptools \
+RUN apt-get update \
+ && apt-get install -y \
+ python3 \
+ python3-dev \
+ python3-pip \
+ python3-setuptools \
&& python3 -m pip install --upgrade pip \
&& python3 -m pip install numpy
diff --git a/build_tools/docker/bazel_nvidia/Dockerfile b/build_tools/docker/bazel_nvidia/Dockerfile
new file mode 100644
index 0000000..86305c4
--- /dev/null
+++ b/build_tools/docker/bazel_nvidia/Dockerfile
@@ -0,0 +1,46 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# An image for building IREE's tensorflow integrations using bazel and vulkan.
+
+# Build using:
+# docker build --tag gcr.io/iree-oss/bazel-nvidia \
+# build_tools/docker/bazel_nvidia/
+
+# Run interactively using the following, where IREE_WORKDIR is the path to your
+# local dev environment:
+# docker run -it --rm --entrypoint bash --volume ${IREE_WORKDIR}:/usr/src/iree/ \
+# gcr.io/iree-oss/bazel-nvidia
+
+# Set up the image and working directory.
+# We start from bazel-nvidia so this image can be used to testing TensorFlow
+# integrations.
+FROM gcr.io/iree-oss/bazel-tensorflow
+
+# Additionally, we need to install the Vulkan SDK and the NVIDIA Vulkan driver.
+
+ARG VULKAN_SDK_VERSION=1.2.141
+
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
+
+RUN wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc \
+ | apt-key add - \
+ && wget -qO \
+ "/etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ "http://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION?}/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ && apt-get update \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y vulkan-sdk nvidia-driver-440
diff --git a/build_tools/docker/bazel_swiftshader/Dockerfile b/build_tools/docker/bazel_swiftshader/Dockerfile
new file mode 100644
index 0000000..2af027d
--- /dev/null
+++ b/build_tools/docker/bazel_swiftshader/Dockerfile
@@ -0,0 +1,92 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# An image for building IREE with tensorflow integrations using bazel and
+# running Vulkan tests on SwiftShader.
+
+# Build using:
+# docker build --tag gcr.io/iree-oss/bazel-swiftshader \
+# build_tools/docker/bazel_swiftshader/
+
+# Run interactively using the following, where IREE_WORKDIR is the path to your
+# local dev environment:
+# docker run -it --rm --entrypoint bash --volume ${IREE_WORKDIR}:/usr/src/iree/ \
+# gcr.io/iree-oss/bazel-swiftshader
+
+# Set up the image and working directory.
+FROM gcr.io/iree-oss/bazel-tensorflow
+
+# TODO(#2651): The following steps are copied from cmake, cmake-vulkan, and
+# cmake-swiftshader. We might want to consider using docker multi-stage
+# builds to factor them out.
+
+# Additionally, we need to install the Vulkan SDK.
+
+ARG VULKAN_SDK_VERSION=1.2.141
+
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
+
+RUN wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc \
+ | apt-key add - \
+ && wget -qO \
+ "/etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ "http://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION?}/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ && apt-get update \
+ && apt-get install -y vulkan-sdk
+
+ARG SWIFTSHADER_COMMIT=6a8a74986c357b0c6fa0dfd2b4b9230af8d39d1a
+
+# Then compile and install SwiftShader.
+
+# cmake, ninja, and zlib is needed for compiling SwiftShader.
+RUN apt-get update && apt-get install -y cmake ninja-build zlib1g-dev
+
+# Update cmake to v3.13+, which is ahead of apt-get's version (3.10.2).
+ENV CMAKE_VERSION 3.13.5
+RUN apt-get update \
+ && mkdir ./cmake_install \
+ && cd cmake_install \
+ && wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION?}/cmake-${CMAKE_VERSION?}.tar.gz" \
+ && tar -xzvf "cmake-${CMAKE_VERSION?}.tar.gz" \
+ && cd "cmake-${CMAKE_VERSION?}/" \
+ && cmake . \
+ && make \
+ && make install
+
+RUN git clone https://github.com/google/swiftshader \
+ && cd swiftshader && git checkout "${SWIFTSHADER_COMMIT?}" && cd .. \
+ # Only build SwiftShader Vulkan ICD.
+ && cmake -S swiftshader/ -B build-swiftshader/ \
+ -GNinja \
+ -DSWIFTSHADER_BUILD_VULKAN=ON \
+ -DSWIFTSHADER_BUILD_EGL=OFF \
+ -DSWIFTSHADER_BUILD_GLESv2=OFF \
+ -DSWIFTSHADER_BUILD_GLES_CM=OFF \
+ -DSWIFTSHADER_BUILD_PVR=OFF \
+ -DSWIFTSHADER_BUILD_TESTS=OFF \
+ && cmake --build build-swiftshader/ \
+ --config Release \
+ --target vk_swiftshader \
+ # Copy the ICD JSON and .so to a known place.
+ && cp -rf build-swiftshader/Linux /swiftshader \
+ # Keep track of the commit we are using.
+ && echo "${SWIFTSHADER_COMMIT?}" > /swiftshader/git-commit \
+ # Clean up everything.
+ && rm -rf swiftshader build-swiftshader
+
+# Set VK_ICD_FILENAMES so Vulkan loader can find the SwiftShader ICD.
+ENV VK_ICD_FILENAMES /swiftshader/vk_swiftshader_icd.json
diff --git a/build_tools/docker/bazel_tensorflow/Dockerfile b/build_tools/docker/bazel_tensorflow/Dockerfile
index a117990..0c37354 100644
--- a/build_tools/docker/bazel_tensorflow/Dockerfile
+++ b/build_tools/docker/bazel_tensorflow/Dockerfile
@@ -24,9 +24,7 @@
# gcr.io/iree-oss/bazel-tensorflow
# Set up the image and working directory.
-FROM gcr.io/iree-oss/bazel
+FROM gcr.io/iree-oss/bazel-bindings
-# Install python3, tensorflow and numpy.
-RUN apt-get install -y python3 python3-dev python3-pip python3-setuptools \
- && python3 -m pip install --upgrade pip \
- && python3 -m pip install numpy tf-nightly
+# Install tensorflow.
+RUN python3 -m pip install tf-nightly
diff --git a/build_tools/docker/build_and_update_gcr.py b/build_tools/docker/build_and_update_gcr.py
index 90c7454..6de59a4 100755
--- a/build_tools/docker/build_and_update_gcr.py
+++ b/build_tools/docker/build_and_update_gcr.py
@@ -20,30 +20,37 @@
"""
import argparse
+import functools
import os
import subprocess
+import sys
IREE_GCR_URL = 'gcr.io/iree-oss/'
DOCKER_DIR = 'build_tools/docker/'
-IMAGES = [
- 'bazel',
- 'bazel-bindings',
- 'bazel-tensorflow',
- 'cmake',
- 'cmake-android',
- 'cmake-nvidia',
- 'rbe-toolchain',
-]
-IMAGES_HELP = [f'`{name}`' for name in IMAGES]
-IMAGES_HELP = f'{", ".join(IMAGES_HELP[:-1])} or {IMAGES_HELP[-1]}'
-
-# Map from image names to images that depend on them.
-IMAGES_TO_DEPENDENT_IMAGES = {
- 'bazel': ['bazel-bindings', 'bazel-tensorflow'],
- 'cmake': ['cmake-android', 'cmake-nvidia'],
+# Map from image names to images that they depend on.
+IMAGES_TO_DEPENDENCIES = {
+ 'bazel': [],
+ 'bazel-bindings': ['bazel'],
+ 'bazel-tensorflow': ['bazel-bindings'],
+ 'bazel-nvidia': ['bazel-tensorflow'],
+ 'bazel-swiftshader': ['bazel-tensorflow'],
+ 'cmake': [],
+ 'cmake-android': ['cmake'],
+ 'cmake-nvidia': ['cmake'],
+ 'cmake-vulkan': ['cmake'],
+ 'cmake-swiftshader': ['cmake-vulkan'],
+ 'rbe-toolchain': [],
}
+IMAGES_TO_DEPENDENT_IMAGES = {k: [] for k in IMAGES_TO_DEPENDENCIES.keys()}
+for image, dependencies in IMAGES_TO_DEPENDENCIES.items():
+ for dependency in dependencies:
+ IMAGES_TO_DEPENDENT_IMAGES[dependency].append(image)
+
+IMAGES_HELP = [f'`{name}`' for name in IMAGES_TO_DEPENDENCIES.keys()]
+IMAGES_HELP = f'{", ".join(IMAGES_HELP)} or `all`'
+
RBE_MESSAGE = """
Remember to update the `rbe_default` digest in the `WORKSPACE` file to reflect
the new digest for the container.
@@ -57,8 +64,10 @@
description="Build IREE's Docker images and optionally push them to GCR.")
parser.add_argument(
'--image',
+ dest='images',
type=str,
required=True,
+ action='append',
help=f'Name of the image to build: {IMAGES_HELP}.')
parser.add_argument(
'--tag',
@@ -73,34 +82,77 @@
help='Push the built images to GCR. Requires gcloud authorization.')
args = parser.parse_args()
- if args.image not in IMAGES:
- raise parser.error('Expected --image to be one of:\n'
- f' {IMAGES_HELP}\n'
- f'but got `{args.image}`.')
-
+ for image in args.images:
+ if image == 'all':
+ args.images = IMAGES_TO_DEPENDENCIES.keys()
+ elif image not in IMAGES_TO_DEPENDENCIES.keys():
+ raise parser.error('Expected --image to be one of:\n'
+ f' {IMAGES_HELP}\n'
+ f'but got `{image}`.')
return args
+def cmp_images_by_dependency(image1, image2):
+ if image2 in IMAGES_TO_DEPENDENT_IMAGES[image1]:
+ return -1
+ if image1 in IMAGES_TO_DEPENDENT_IMAGES[image2]:
+ return 1
+ return (image1 > image2) - (image1 < image2)
+
+
+def run_command(command):
+ print(f'Running: {" ".join(command)}')
+ process = subprocess.Popen(
+ command,
+ bufsize=1,
+ stderr=subprocess.STDOUT,
+ stdout=subprocess.PIPE,
+ text=True)
+ for line in process.stdout:
+ print(line, end='')
+
+ return process.poll()
+
+
+def check_command(command):
+ exit_code = run_command(command)
+ if exit_code != 0:
+ print(f'Command failed: {" ".join(command)}')
+ sys.exit(exit_code)
+
+
if __name__ == '__main__':
args = parse_arguments()
# Ensure the user has the correct authorization if they try to push to GCR.
if args.push:
- subprocess.check_output(['gcloud', 'auth', 'configure-docker'])
+ if run_command(['which', 'gcloud']) != 0:
+ print('gcloud not found.'
+ ' See https://cloud.google.com/sdk/install for installation.')
+ sys.exit(1)
+ check_command(['gcloud', 'auth', 'configure-docker'])
- # Check if any images depend on `args.image` and update them if they do.
- images_to_update = [args.image]
- if args.image in IMAGES_TO_DEPENDENT_IMAGES:
- images_to_update.extend(IMAGES_TO_DEPENDENT_IMAGES[args.image])
+ # Check if any images depend on `args.images` and update them if they do.
+ images_to_update_set = set()
+ to_check = list(args.images)
+ while to_check:
+ image = to_check.pop()
+ if image not in images_to_update_set:
+ images_to_update_set.add(image)
+ to_check.extend(IMAGES_TO_DEPENDENT_IMAGES[image])
+ # Topo sort by image dependency
+ images_to_update = sorted(
+ images_to_update_set, key=functools.cmp_to_key(cmp_images_by_dependency))
+
+ print(f'Also updating dependent images. Will update: {images_to_update}')
for image in images_to_update:
print(f'Updating image {image}')
image_url = os.path.join(IREE_GCR_URL, f'{image}:{args.tag}')
image_path = os.path.join(DOCKER_DIR, image.replace('-', '_'))
- subprocess.check_output(
- ['docker', 'build', '--tag', image_url, image_path])
+ check_command(['docker', 'build', '--tag', image_url, image_path])
if args.push:
- subprocess.check_output(['docker', 'push', image_url])
+ check_command(['docker', 'push', image_url])
if 'rbe-toolchain' in images_to_update:
print(RBE_MESSAGE)
diff --git a/build_tools/docker/cmake/Dockerfile b/build_tools/docker/cmake/Dockerfile
index 92cece8..bde1f4d 100644
--- a/build_tools/docker/cmake/Dockerfile
+++ b/build_tools/docker/cmake/Dockerfile
@@ -27,14 +27,25 @@
FROM ubuntu:18.04
WORKDIR /usr/src/iree/
-RUN apt-get update
-# TODO: Remove this if the `apt-get install` below works without it again.
-RUN apt update
+RUN apt-get update \
+ && apt-get install -y \
+ # git for updating IREE's submodules.
+ git \
+ # For later installations
+ wget \
+ # For building with ninja
+ ninja-build \
+ # For bootstrapping the cmake installation
+ cmake \
+ # core IREE dependencies.
+ clang \
+ libsdl2-dev \
+ libssl-dev
# Update cmake to v3.13+, which is ahead of apt-get's version (3.10.2).
# Install dependencies, including an old version of cmake to bootstrap.
ENV CMAKE_VERSION 3.13.5
-RUN apt-get install -y clang cmake libssl-dev wget \
+RUN apt-get update \
&& mkdir ./cmake_install \
&& cd cmake_install \
&& wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION?}/cmake-${CMAKE_VERSION?}.tar.gz" \
@@ -44,14 +55,12 @@
&& make \
&& make install
-# Install dependencies.
-RUN apt-get install -y \
- git \
- ninja-build \
- python3 \
- python3-pip \
- python3-setuptools \
- # Install dependencies for the python bindings tests.
+# Dependencies for the python bindings tests.
+RUN apt-get update \
+ && apt-get install -y \
+ python3 \
+ python3-pip \
+ python3-setuptools \
&& python3 -m pip install --upgrade pip \
&& python3 -m pip install numpy absl-py
diff --git a/build_tools/docker/cmake_nvidia/Dockerfile b/build_tools/docker/cmake_nvidia/Dockerfile
index 8679656..1fdeaee 100644
--- a/build_tools/docker/cmake_nvidia/Dockerfile
+++ b/build_tools/docker/cmake_nvidia/Dockerfile
@@ -34,19 +34,22 @@
# does not support Ubuntu 18.04.
# This allows to share configuration with base CMake, but it also means we need
# to MATCH the driver version between the host machine and the docker image.
+# TODO: use cmake-vulkan as the base.
FROM gcr.io/iree-oss/cmake
# Additionally, we need to install the Vulkan SDK and the NVIDIA Vulkan driver.
ARG VULKAN_SDK_VERSION=1.2.141
-# Disable apt-key parse waring.
-ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
-# Disable prompt during keyboard configuration.
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - \
- && wget -qO /etc/apt/sources.list.d/lunarg-vulkan-$VULKAN_SDK_VERSION-bionic.list http://packages.lunarg.com/vulkan/$VULKAN_SDK_VERSION/lunarg-vulkan-$VULKAN_SDK_VERSION-bionic.list \
+RUN wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc \
+ | apt-key add - \
+ && wget -qO \
+ "/etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ "http://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION?}/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
&& apt-get update \
&& apt-get install -y vulkan-sdk nvidia-driver-440
diff --git a/build_tools/docker/cmake_swiftshader/Dockerfile b/build_tools/docker/cmake_swiftshader/Dockerfile
new file mode 100644
index 0000000..b92ce02
--- /dev/null
+++ b/build_tools/docker/cmake_swiftshader/Dockerfile
@@ -0,0 +1,60 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# An image for building IREE using CMake and testing IREE with SwiftShader
+# Vulkan implementation.
+
+# Build using:
+# docker build --tag gcr.io/iree-oss/cmake-swiftshader \
+# build_tools/docker/cmake_swiftshader/
+
+# Run interactively using the following, where IREE_WORKDIR is the path to your
+# local dev environment:
+# docker run -it --rm --entrypoint bash \
+# --volume "${IREE_WORKDIR?}:/usr/src/iree/" \
+# --gpus all \
+# gcr.io/iree-oss/cmake-swiftshader
+
+# Set up the image and working directory by inheriting the base
+# CMake configuration.
+FROM gcr.io/iree-oss/cmake-vulkan
+
+ARG SWIFTSHADER_COMMIT=6a8a74986c357b0c6fa0dfd2b4b9230af8d39d1a
+
+# zlib is needed for compiling SwiftShader.
+RUN apt-get update && apt-get install zlib1g-dev
+
+RUN git clone https://github.com/google/swiftshader \
+ && cd swiftshader && git checkout "${SWIFTSHADER_COMMIT?}" && cd .. \
+ # Only build SwiftShader Vulkan ICD.
+ && cmake -S swiftshader/ -B build-swiftshader/ \
+ -GNinja \
+ -DSWIFTSHADER_BUILD_VULKAN=ON \
+ -DSWIFTSHADER_BUILD_EGL=OFF \
+ -DSWIFTSHADER_BUILD_GLESv2=OFF \
+ -DSWIFTSHADER_BUILD_GLES_CM=OFF \
+ -DSWIFTSHADER_BUILD_PVR=OFF \
+ -DSWIFTSHADER_BUILD_TESTS=OFF \
+ && cmake --build build-swiftshader/ \
+ --config Release \
+ --target vk_swiftshader \
+ # Copy the ICD JSON and .so to a known place.
+ && cp -rf build-swiftshader/Linux /swiftshader \
+ # Keep track of the commit we are using.
+ && echo "${SWIFTSHADER_COMMIT?}" > /swiftshader/git-commit \
+ # Clean up everything.
+ && rm -rf swiftshader build-swiftshader
+
+# Set VK_ICD_FILENAMES so Vulkan loader can find the SwiftShader ICD.
+ENV VK_ICD_FILENAMES /swiftshader/vk_swiftshader_icd.json
diff --git a/build_tools/docker/cmake_vulkan/Dockerfile b/build_tools/docker/cmake_vulkan/Dockerfile
new file mode 100644
index 0000000..f2cdee6
--- /dev/null
+++ b/build_tools/docker/cmake_vulkan/Dockerfile
@@ -0,0 +1,47 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# An image with Vulkan SDK for building IREE using CMake.
+
+# Build using:
+# docker build --tag gcr.io/iree-oss/cmake-vulkan \
+# build_tools/docker/cmake_vulkan/
+
+# Run interactively using the following, where IREE_WORKDIR is the path to your
+# local dev environment:
+# docker run -it --rm --entrypoint bash \
+# --volume "${IREE_WORKDIR?}:/usr/src/iree/" \
+# --gpus all \
+# gcr.io/iree-oss/cmake-vulkan
+
+# Set up the image and working directory by inheriting the base
+# CMake configuration.
+FROM gcr.io/iree-oss/cmake
+
+# Additionally, we need to install the Vulkan SDK.
+
+ARG VULKAN_SDK_VERSION=1.2.141
+
+# Disable apt-key parse waring. If someone knows how to do whatever the "proper"
+# thing is then feel free. The warning complains about parsing apt-key output,
+# which we're not even doing.
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
+
+RUN wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc \
+ | apt-key add - \
+ && wget -qO \
+ "/etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ "http://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION?}/lunarg-vulkan-${VULKAN_SDK_VERSION?}-bionic.list" \
+ && apt-get update \
+ && apt-get install -y vulkan-sdk
diff --git a/build_tools/docker/rbe_toolchain/Dockerfile b/build_tools/docker/rbe_toolchain/Dockerfile
index c2df299..878aca3 100755
--- a/build_tools/docker/rbe_toolchain/Dockerfile
+++ b/build_tools/docker/rbe_toolchain/Dockerfile
@@ -21,23 +21,29 @@
FROM gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:1a8ed713f40267bb51fe17de012fa631a20c52df818ccb317aaed2ee068dfc61
-RUN apt-get update
-RUN apt-get install -y python3 python3-pip
-RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install numpy
+RUN apt-get update \
+ && apt-get install -y \
+ python3 \
+ python3-pip \
+ && python3 -m pip install --upgrade pip \
+ && python3 -m pip install numpy
-# Install dependencies for python3.6-dev
-RUN apt-get install -y software-properties-common
+# Dependency for python3.6-dev. Needs to be installed separately from the above
+# for... some reason
+RUN apt-get update && apt-get install -y software-properties-common
+
# apt-add-repository requires a version of python with the softwareproperties
# module. To use this command, we:
# 1. remove the symlink to python3 from python3.6 and symlink it to python3.5
# 2. run apt-add-repository with python3 = python3.5
# 3. resymlink python3 to /opt/python3.6/bin/python3.6
# See https://github.com/google/iree/issues/1966 for more information.
-RUN rm /usr/bin/python3 && ln -s /usr/bin/python3.5 /usr/bin/python3
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN rm /usr/bin/python3 && ln -s /opt/python3.6/bin/python3.6 /usr/bin/python3
+RUN rm /usr/bin/python3 \
+ && ln -s /usr/bin/python3.5 /usr/bin/python3 \
+ && add-apt-repository ppa:deadsnakes/ppa \
+ && rm /usr/bin/python3 \
+ && ln -s /opt/python3.6/bin/python3.6 /usr/bin/python3
# Install python3.6-dev
-RUN apt-get update
-RUN apt-get install -y python3.6 python3.6-dev
+RUN apt-get update \
+ && apt-get install -y python3.6 python3.6-dev
diff --git a/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build.sh
new file mode 100755
index 0000000..32c061f
--- /dev/null
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# For use within a IREE bazel-swiftshader docker image on a Kokoro VM.
+# Log some information about the environment, initialize the submodules and then
+# run the bazel integrations tests.
+
+set -e
+set -x
+
+# Print the UTC time when set -x is on
+export PS4='[$(date -u "+%T %Z")] '
+
+# Check these exist and print the versions for later debugging
+bazel --version
+"$CXX" --version
+"$CC" --version
+"$PYTHON_BIN" -V
+# TODO(#1875): Make PYTHON_BIN also control the runtime version
+python3 -V
+
+# Print Vulkan related information: SDK version and GPU ICD version
+vulkaninfo 2>/dev/null | grep "Vulkan Instance" || echo "Vulkan Instance not found!"
+vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties" || echo "VkPhysicalDeviceProperties not found!"
+
+# Print SwiftShader git commit
+cat /swiftshader/git-commit
+
+echo "Initializing submodules"
+./scripts/git/submodule_versions.py init
+
+echo "Building and testing with bazel"
+./build_tools/bazel/build_tensorflow.sh
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 62%
copy from kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build_kokoro.sh
index e2a9bd3..63c5893
--- a/kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build_kokoro.sh
@@ -14,30 +14,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Build and test IREE's bindings within the gcr.io/iree-oss/bazel-bindings
+# Build and test IREE's integrations within gcr.io/iree-oss/bazel-swiftshader
# image using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the bazel-bindings image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
- gcr.io/iree-oss/bazel-bindings:prod \
- kokoro/gcp_ubuntu/bazel/bindings/build.sh
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
+ --env IREE_VULKAN_DISABLE=0 \
+ gcr.io/iree-oss/bazel-swiftshader:prod \
+ build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/common.cfg
similarity index 72%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/common.cfg
index e4cc270..609936e 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/common.cfg
@@ -1,6 +1,6 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Copyright 2020 Google LLC
+# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/google.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/google.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/main.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-swiftshader/integrations/presubmit.cfg
diff --git a/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build.sh
new file mode 100755
index 0000000..51b491d
--- /dev/null
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# For use within a IREE bazel-nvidia docker image on a Kokoro VM.
+# Log some information about the environment, initialize the submodules and then
+# run the bazel integrations tests.
+
+set -e
+set -x
+
+# Print the UTC time when set -x is on
+export PS4='[$(date -u "+%T %Z")] '
+
+# Check these exist and print the versions for later debugging
+bazel --version
+"$CXX" --version
+"$CC" --version
+"$PYTHON_BIN" -V
+# TODO(#1875): Make PYTHON_BIN also control the runtime version
+python3 -V
+
+# Print Vulkan related information: SDK version and GPU ICD version
+vulkaninfo 2>/dev/null | grep "Vulkan Instance" || echo "Vulkan Instance not found!"
+vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties" || echo "VkPhysicalDeviceProperties not found!"
+
+echo "Initializing submodules"
+./scripts/git/submodule_versions.py init
+
+echo "Building and testing with bazel"
+./build_tools/bazel/build_tensorflow.sh
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 88%
rename from kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build_kokoro.sh
index 1716266..0712a66
--- a/kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build_kokoro.sh
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Build and test IREE's integrations within the gcr.io/iree-oss/bazel-tensorflow
+# Build and test IREE's integrations within the gcr.io/iree-oss/bazel-nvidia
# image using Kokoro.
set -e
@@ -32,8 +32,10 @@
--volume "${WORKDIR?}:${WORKDIR?}" \
--workdir="${WORKDIR?}" \
--rm \
- gcr.io/iree-oss/bazel-tensorflow:prod \
- kokoro/gcp_ubuntu/bazel/integrations/build.sh
+ --env IREE_VULKAN_DISABLE=0 \
+ --gpus all \
+ gcr.io/iree-oss/bazel-nvidia:prod \
+ build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/common.cfg
similarity index 72%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/common.cfg
index e4cc270..eb09ca3 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/common.cfg
@@ -1,6 +1,6 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Copyright 2020 Google LLC
+# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/google.cfg
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/google.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/main.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86-turing/integrations/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/build.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build.sh
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/bindings/build.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build.sh
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 73%
rename from kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build_kokoro.sh
index e2a9bd3..24cbabf
--- a/kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build_kokoro.sh
@@ -16,28 +16,28 @@
# Build and test IREE's bindings within the gcr.io/iree-oss/bazel-bindings
# image using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the bazel-bindings image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
gcr.io/iree-oss/bazel-bindings:prod \
- kokoro/gcp_ubuntu/bazel/bindings/build.sh
+ build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/common.cfg
similarity index 72%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/common.cfg
index e4cc270..8a49430 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/common.cfg
@@ -1,6 +1,6 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Copyright 2020 Google LLC
+# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that run the bindings build with bazel
+# on linux.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/google.cfg
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/google.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/main.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/bindings/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/core/build.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build.sh
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/core/build.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build.sh
diff --git a/kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 72%
rename from kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build_kokoro.sh
index 031eab8..da07cdd
--- a/kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build_kokoro.sh
@@ -16,28 +16,28 @@
# Build and test IREE's core within the gcr.io/iree-oss/bazel image using
# Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the bazel image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
gcr.io/iree-oss/bazel:prod \
- kokoro/gcp_ubuntu/bazel/core/build.sh
+ build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/common.cfg
old mode 100644
new mode 100755
similarity index 72%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/common.cfg
index e4cc270..3a22d10
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/common.cfg
@@ -1,6 +1,6 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Copyright 2020 Google LLC
+# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that run the core build with bazel on
+# linux.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/core/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/google.cfg
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/core/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/google.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/core/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/core/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/main.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/core/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/core/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/build.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build.sh
similarity index 100%
rename from kokoro/gcp_ubuntu/bazel/integrations/build.sh
rename to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build.sh
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 73%
copy from kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build_kokoro.sh
index 1716266..c35d897
--- a/kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build_kokoro.sh
@@ -16,28 +16,28 @@
# Build and test IREE's integrations within the gcr.io/iree-oss/bazel-tensorflow
# image using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the bazel-tensorflow image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
gcr.io/iree-oss/bazel-tensorflow:prod \
- kokoro/gcp_ubuntu/bazel/integrations/build.sh
+ build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/common.cfg
similarity index 72%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/common.cfg
index e4cc270..eb31e55 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/common.cfg
@@ -1,6 +1,6 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Copyright 2020 Google LLC
+# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that run the integrations build with
+# bazel on linux.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/google.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/google.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/main.cfg
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/bazel/integrations/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 73%
rename from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh
index f912073..329e226
--- a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh
@@ -16,28 +16,28 @@
# Cross-compile the project towards Android arm64-v8a with the
# gcr.io/iree-oss/cmake-android image using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR=${KOKORO_ARTIFACTS_DIR?}/github/iree
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the cmake-android image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
gcr.io/iree-oss/cmake-android:prod \
- kokoro/gcp_ubuntu/cmake/android/build.sh arm64-v8a
+ build_tools/kokoro/gcp_ubuntu/cmake/android/build.sh arm64-v8a
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
similarity index 75%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
index e4cc270..1376e08 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that cross-compile IREE towards
+# Android arm64-v8a using CMake.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/google.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/google.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/main.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/android/build.sh
similarity index 100%
rename from kokoro/gcp_ubuntu/cmake/android/build.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/android/build.sh
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh
similarity index 67%
copy from kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh
index a56c847..ddd5a74 100755
--- a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh
@@ -29,9 +29,20 @@
"$CXX" --version
python3 --version
+# For some reason the environment variable set in base `cmake` image cannot
+# reach the child `cmake-swiftshader` image. Given this environment variable
+# is just a temporary solution, duplicate it here instead of spending all
+# the effort trying to figure out why.
+# TODO(#2645): remove this once we have a better solution for AOT linker
+# discovery.
+export IREE_LLVMAOT_LINKER_PATH=/usr/bin/ld
+
# Print Vulkan related information: SDK version and GPU ICD version
-vulkaninfo 2>/dev/null | grep "Vulkan Instance"
-vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties"
+vulkaninfo 2>/dev/null | grep "Vulkan Instance" || echo "Vulkan Instance not found!"
+vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties" || echo "VkPhysicalDeviceProperties not found!"
+
+# Print SwiftShader git commit
+cat /swiftshader/git-commit
echo "Initializing submodules"
./scripts/git/submodule_versions.py init
diff --git a/kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh
old mode 100644
new mode 100755
similarity index 62%
copy from kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh
index 031eab8..82426ec
--- a/kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh
@@ -14,30 +14,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Build and test IREE's core within the gcr.io/iree-oss/bazel image using
-# Kokoro.
+# Build and test the project within the gcr.io/iree-oss/cmake-swiftshader image
+# using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the bazel image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
- gcr.io/iree-oss/bazel:prod \
- kokoro/gcp_ubuntu/bazel/core/build.sh
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
+ --env IREE_VULKAN_DISABLE=0 \
+ gcr.io/iree-oss/cmake-swiftshader:prod \
+ build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/common.cfg
similarity index 76%
rename from kokoro/gcp_ubuntu/cmake/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/common.cfg
index e4cc270..9f4a82c 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/common.cfg
@@ -14,6 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/continuous.cfg
similarity index 100%
rename from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/continuous.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/google.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/google.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/main.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
similarity index 87%
rename from kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
index a56c847..f44aa82 100755
--- a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
@@ -30,8 +30,8 @@
python3 --version
# Print Vulkan related information: SDK version and GPU ICD version
-vulkaninfo 2>/dev/null | grep "Vulkan Instance"
-vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties"
+vulkaninfo 2>/dev/null | grep "Vulkan Instance" || echo "Vulkan Instance not found!"
+vulkaninfo 2>/dev/null | grep -A7 "VkPhysicalDeviceProperties" || echo "VkPhysicalDeviceProperties not found!"
echo "Initializing submodules"
./scripts/git/submodule_versions.py init
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh
similarity index 64%
rename from kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh
index 051c0d0..0b2364a 100755
--- a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh
@@ -14,32 +14,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Build and test the project within the gcr.io/iree-oss/cmake using Kokoro.
+# Build and test the project within the gcr.io/iree-oss/cmake image using
+# Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR=${KOKORO_ARTIFACTS_DIR?}/github/iree
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the cmake image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
--env IREE_VULKAN_DISABLE=0 \
--gpus all \
gcr.io/iree-oss/cmake-nvidia:prod \
- kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
+ build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-# TODO: enable this after making it work
-#sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
-#ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
+ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
similarity index 76%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
index e4cc270..bdb9163 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
@@ -14,6 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that run cmake on linux.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/google.cfg
similarity index 100%
rename from kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/google.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/main.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/linux/x86-turing/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/presubmit.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build.sh
similarity index 100%
rename from kokoro/gcp_ubuntu/cmake/build.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build.sh
diff --git a/kokoro/gcp_ubuntu/cmake/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build_kokoro.sh
similarity index 72%
rename from kokoro/gcp_ubuntu/cmake/build_kokoro.sh
rename to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build_kokoro.sh
index 8c35e60..ee459ff 100755
--- a/kokoro/gcp_ubuntu/cmake/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build_kokoro.sh
@@ -15,28 +15,28 @@
# limitations under the License.
# Build and test the project within the gcr.io/iree-oss/cmake using Kokoro.
+# Requires the environment variables KOKORO_ROOT and KOKORO_ARTIFACTS_DIR, which
+# are set by Kokoro.
-set -e
set -x
+set -e
+set -o pipefail
# Print the UTC time when set -x is on
export PS4='[$(date -u "+%T %Z")] '
-# Kokoro checks out the repository here.
-WORKDIR=${KOKORO_ARTIFACTS_DIR?}/github/iree
+source "${KOKORO_ARTIFACTS_DIR?}/github/iree/build_tools/kokoro/gcp_ubuntu/docker_common.sh"
-# Mount the checked out repository, make that the working directory and run the
-# tests in the cmake image.
-docker run \
- --volume "${WORKDIR?}:${WORKDIR?}" \
- --workdir="${WORKDIR?}" \
- --rm \
+# Sets DOCKER_RUN_ARGS
+docker_setup
+
+docker run "${DOCKER_RUN_ARGS[@]?}" \
gcr.io/iree-oss/cmake:prod \
- kokoro/gcp_ubuntu/cmake/build.sh
+ build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build.sh
# Kokoro will rsync this entire directory back to the executor orchestrating the
# build which takes forever and is totally useless.
-sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
+rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*
# Print out artifacts dir contents after deleting them as a coherence check.
ls -1a "${KOKORO_ARTIFACTS_DIR?}/"
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/common.cfg
similarity index 76%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/common.cfg
index e4cc270..49e6865 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/common.cfg
@@ -14,6 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+# Common configuration for Kokoro builds that run cmake on linux.
+
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/google.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/google.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/main.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/main.cfg
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/presubmit.cfg
similarity index 100%
copy from kokoro/gcp_ubuntu/cmake/android/arm64-v8a/continuous.cfg
copy to build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/presubmit.cfg
diff --git a/build_tools/kokoro/gcp_ubuntu/docker_common.sh b/build_tools/kokoro/gcp_ubuntu/docker_common.sh
new file mode 100644
index 0000000..d0b15dd
--- /dev/null
+++ b/build_tools/kokoro/gcp_ubuntu/docker_common.sh
@@ -0,0 +1,99 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Functions for setting up Docker containers to run on Kokoro
+
+# Sets up files and environment to enable running all our Kokoro docker scripts.
+# In particular, does some shenanigans to enable running with the current user.
+# Some of this setup is only strictly necessary for Bazel, but it doesn't hurt
+# for anything else.
+# Requires that KOKORO_ROOT and KOKORO_ARTIFACTS_DIR have been set
+# Sets the environment variable DOCKER_RUN_ARGS to be used by subsequent
+# `docker run` invocations.
+function docker_setup() {
+ # Make the source repository available and launch containers in that
+ # directory.
+ local workdir="${KOKORO_ARTIFACTS_DIR?}/github/iree"
+ DOCKER_RUN_ARGS=(
+ --volume="${workdir?}:${workdir?}"
+ --workdir="${workdir?}"
+ )
+
+ # Delete the container after the run is complete.
+ DOCKER_RUN_ARGS+=(--rm)
+
+
+ # Run as the current user and group. If only it were this simple...
+ DOCKER_RUN_ARGS+=(--user="$(id -u):$(id -g)")
+
+
+ # The Docker container doesn't know about the users and groups of the host
+ # system. We have to tell it. This is just a mapping of IDs to names though.
+ # The thing that really matters is the IDs, so the key thing is that Docker
+ # writes files as the same ID as the current user, which we set above, but
+ # without the group and passwd file, lots of things get upset because they
+ # don't recognize the current user ID (e.g. `whoami` fails). Bazel in
+ # particular looks for a home directory and is not happy when it can't find
+ # one.
+ # So we make the container share the host mapping, which guarantees that the
+ # current user is mapped. If there was any user or group in the container
+ # that we cared about, this wouldn't necessarily work because the host and
+ # container don't necessarily map the ID to the same user. Luckily though,
+ # we don't.
+ # We don't just mount the real /etc/passwd and /etc/group because Google
+ # Linux workstations do some interesting stuff with user/group permissions
+ # such that they don't contain the information about normal users and we
+ # want these scripts to be runnable locally for debugging.
+ # Instead we dump the results of `getent` to some fake files.
+ local fake_etc_dir="${KOKORO_ROOT?}/fake_etc"
+ mkdir -p "${fake_etc_dir?}"
+
+ local fake_group="${fake_etc_dir?}/group"
+ local fake_passwd="${fake_etc_dir?}/passwd"
+
+ getent group > "${fake_group?}"
+ getent passwd > "${fake_passwd?}"
+
+ DOCKER_RUN_ARGS+=(
+ --volume="${fake_group?}:/etc/group:ro"
+ --volume="${fake_passwd?}:/etc/passwd:ro"
+ )
+
+
+ # Bazel stores its cache in the user home directory by default. It's
+ # possible to override this, but that would require changing our Bazel
+ # startup options, which means polluting all our scripts and making them not
+ # runnable locally. Instead, we give it a special home directory to write
+ # into. We don't just mount the user home directory (or some subset thereof)
+ # for two reasons:
+ # 1. We probably don't want Docker to just write into the user's home
+ # directory when running locally.
+ # 2. When running with Kokoro, we mount a local scratch SSD to KOKORO_ROOT
+ # whereas the home directory is on the persistent SSD boot disk. It
+ # turns out that makes a huge difference in performance for Bazel
+ # running with local execution (not with RBE) because it is IO bound at
+ # 64 cores.
+ local fake_home_dir="${KOKORO_ROOT?}/fake_home"
+ mkdir -p "${fake_home_dir}"
+
+ DOCKER_RUN_ARGS+=(
+ --volume="${fake_home_dir?}:${HOME?}"
+ )
+
+ # Make gcloud credentials available. This isn't necessary when running in
+ # GCE but enables using this script locally with RBE.
+ DOCKER_RUN_ARGS+=(
+ --volume="${HOME?}/.config/gcloud:${HOME?}/.config/gcloud:ro"
+ )
+}
diff --git a/build_tools/kokoro/gcp_ubuntu/simulate_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/simulate_kokoro.sh
new file mode 100755
index 0000000..38c6782
--- /dev/null
+++ b/build_tools/kokoro/gcp_ubuntu/simulate_kokoro.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Simulates the behavior of Kokoro on a local machine.
+# Usage:
+# ./kokoro/gcp_ubuntu/simulate_kokoro.sh build_tools/kokoro/gcp_ubuntu/bazel/core/build.sh
+#
+# Just does the part of the Kokoro setup that we care about and invokes the
+# given build script.
+# An optional second parameter can be used to specify a different repo to clone
+# from. Especially useful for cloning the current git repo.
+# ./kokoro/gcp_ubuntu/simulate_kokoro.sh build_tools/kokoro/gcp_ubuntu/bazel/core/build.sh "$PWD/.git"
+
+set -x
+set -e
+set -o pipefail
+
+RELATIVE_KOKORO_BUILD_SCRIPT="${1?}"
+REPO_TO_CLONE="${2:-git@github.com:google/iree.git}"
+
+# Set up the temporary Kokoro directories
+export KOKORO_ROOT="$(mktemp --directory --tmpdir kokoro-root-XXXXXX)"
+mkdir -p "${KOKORO_ROOT?}/src/github"
+export KOKORO_ARTIFACTS_DIR="${KOKORO_ROOT?}/src"
+cd "${KOKORO_ARTIFACTS_DIR?}/github"
+
+# Clone the repo
+git clone "${REPO_TO_CLONE?}"
+
+# The build script is assumed to be relative to the iree repo root.
+KOKORO_BUILD_SCRIPT="${KOKORO_ARTIFACTS_DIR?}/github/iree/${RELATIVE_KOKORO_BUILD_SCRIPT?}"
+chmod +x "${KOKORO_BUILD_SCRIPT?}"
+
+# This is where Kokoro starts its execution.
+cd "${KOKORO_ARTIFACTS_DIR?}"
+
+# Run the actual script.
+"${KOKORO_BUILD_SCRIPT?}"
+
+# Clean up after ourselves.
+rm -rf "${KOKORO_ROOT?}"
diff --git a/build_tools/manylinux_py_setup.py b/build_tools/manylinux_py_setup.py
new file mode 100755
index 0000000..abdd11d
--- /dev/null
+++ b/build_tools/manylinux_py_setup.py
@@ -0,0 +1,86 @@
+#!/opt/python/cp38-cp38/bin/python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generates CMake arguments to build all manylinux python versions.
+
+manylinux containers have all python version linked under /opt/python.
+This script scrapes them to get configuration, install deps, etc.
+
+Usage:
+ Install dependencies:
+ manylinux_py_setup.py deps
+ Get CMake arguments to build (typically via $() expansion):
+ manylinux_py_setup.py args
+"""
+
+import os
+from pathlib import Path
+import subprocess
+import sys
+import sysconfig
+
+
+def _get_python_exes():
+ PYTHON_PARENT_PATH = Path("/opt/python")
+ return PYTHON_PARENT_PATH.glob("*/bin/python")
+
+
+def install_deps():
+ for python_exe in _get_python_exes():
+ args = [
+ str(python_exe),
+ "-m",
+ "pip",
+ "install",
+ "absl-py",
+ "numpy",
+ ]
+ print("EXEC:", " ".join(args))
+ subprocess.check_call(args)
+
+
+def dump_current(identifier):
+ print("-DIREE_MULTIPY_{}_EXECUTABLE='{}'".format(identifier, sys.executable))
+ print("-DIREE_MULTIPY_{}_INCLUDE_DIRS='{}'".format(
+ identifier, sysconfig.get_config_var("INCLUDEPY")))
+ # TODO: Print LIBRARIES for Windows and OSX
+ print("-DIREE_MULTIPY_{}_EXTENSION='{}'".format(
+ identifier, sysconfig.get_config_var("EXT_SUFFIX")))
+
+
+def dump_all():
+ versions_ids = []
+ for python_exe in _get_python_exes():
+ identifier = python_exe.parent.parent.name
+ versions_ids.append(identifier)
+ # Invoke ourselves with a different interpreter/args to dump config.
+ subprocess.check_call(
+ [str(python_exe), __file__, "_current_args", identifier])
+ print("-DIREE_MULTIPY_VERSIONS='{}'".format(";".join(versions_ids)))
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print("SYNTAX: mainlinux_py_setup.py {deps|args}")
+ sys.exit(1)
+ command = sys.argv[1]
+ if command == "args":
+ dump_all()
+ elif command == "_current_args":
+ dump_current(sys.argv[2])
+ elif command == "deps":
+ install_deps()
+ else:
+ print("Unexpected command")
+ sys.exit(1)
diff --git a/build_tools/third_party/sdl2/BUILD.overlay b/build_tools/third_party/sdl2/BUILD.overlay
index 951160a..ba663f0 100644
--- a/build_tools/third_party/sdl2/BUILD.overlay
+++ b/build_tools/third_party/sdl2/BUILD.overlay
@@ -26,8 +26,15 @@
"src/render/direct3d*/**",
"src/render/SDL_d3d*",
"src/haptic/windows/**",
+ "src/locale/windows/**",
+ "src/locale/winrt/**",
+ # Ignore Android and Emscripten locales
+ "src/locale/android/**",
+ "src/locale/emscripten/**",
# Remove support for QNX
"src/video/qnx/**",
+ # No dbus -> don't try to compile this file
+ "src/core/linux/SDL_fcitx.c",
]
sdl_sources = select({
@@ -42,6 +49,9 @@
# Ignore Linux/Unix
"src/core/linux/**",
"src/core/unix/**",
+ # Ignore Android and Emscripten locales
+ "src/locale/android/**",
+ "src/locale/emscripten/**",
# Ignore thread (exclude for thread/windows)
"src/thread/**",
# Remove support for QNX
@@ -86,6 +96,7 @@
"include/SDL_keyboard.h",
"include/SDL_keycode.h",
"include/SDL_loadso.h",
+ "include/SDL_locale.h",
"include/SDL_log.h",
"include/SDL_main.h",
"include/SDL_messagebox.h",
diff --git a/build_tools/third_party/sdl2/SDL_config_windows.h b/build_tools/third_party/sdl2/SDL_config_windows.h
index 460fb3b..7626de6 100644
--- a/build_tools/third_party/sdl2/SDL_config_windows.h
+++ b/build_tools/third_party/sdl2/SDL_config_windows.h
@@ -227,6 +227,9 @@
/* Enable filesystem support */
#define SDL_FILESYSTEM_WINDOWS 1
+/* Disable sensor support */
+#define SDL_SENSOR_DISABLED 1
+
/* Enable assembly routines (Win64 doesn't have inline asm) */
#ifndef _WIN64
#define SDL_ASSEMBLY_ROUTINES 1
diff --git a/build_tools/third_party/swiftshader/build_vk_swiftshader.sh b/build_tools/third_party/swiftshader/build_vk_swiftshader.sh
old mode 100644
new mode 100755
diff --git a/colab/README.md b/colab/README.md
index d35df2a..b0bfe52 100644
--- a/colab/README.md
+++ b/colab/README.md
@@ -1,7 +1,7 @@
# Google Colaboratory (Colab) Notebooks
To run these notebooks with a local runtime, refer to the
-[Using Colab docs](../docs/using_colab.md).
+[Using Colab docs](../docs/using_iree/using_colab.md).
Hosted/remote runtimes are not yet supported.
diff --git a/docs/design_docs/codegen_passes.md b/docs/design_docs/codegen_passes.md
new file mode 100644
index 0000000..83e37fc
--- /dev/null
+++ b/docs/design_docs/codegen_passes.md
@@ -0,0 +1,640 @@
+# IREE CPU/GPU Code Generation Pipeline
+
+This document is intended to provide an overview of the codegen pipeline within
+IREE used to generate CPU/GPU code. It intends to give an overview of the main
+passes used, the objective of the pass, the current implementation, and what it
+is expected to achieve in the long term.
+
+Note that while the code generation pipeline supports dynamic shapes, this work
+is very preliminary. The description of this is not covered here.
+
+## Input to the codegen pipeline
+
+The input to the code generation pipeline is the module within the
+`hal.executable.target` operation. Functions within this module that do __not__
+have `Visibility::Private` are the *entry point* functions of the dispatch
+region. These are the functions that are *invoked* by the IREE runtime. In
+addition, each dispatch region also contains a `hal.interface` operation that
+describes the ABI to use for the dispatch region. Two examples of the input to
+the code generation pipeline are shown below. In both of these, a single
+dispatch function contains a sequence of MHLO operations that the dispatch
+region creation has grouped into a single region. Ideally the grouped operations
+are fused into a single kernel.
+
+```mlir
+hal.executable.target "vulkan*" {
+ module attributes {spv.target_env = ...} {
+ func @main_ex_dispatch() {
+ %c0 = constant 0 : index
+ %0 = hal.interface.load.tensor @legacy_io::@arg0,
+ offset = %c0 : tensor<4x5xf32>
+ %1 = hal.interface.load.tensor @legacy_io::@arg1,
+ offset = %c0 : tensor<5x10xf32>
+ %2 = "mhlo.dot"(%0, %1) {precision_config = ["DEFAULT", "DEFAULT"]} :
+ (tensor<4x5xf32>, tensor<5x10xf32>) -> tensor<4x10xf32>
+ hal.interface.store.tensor %2, @legacy_io::@ret0,
+ offset = %c0 : tensor<4x10xf32>
+ return
+ }
+ hal.interface @legacy_io attributes {sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0,
+ type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=0, binding=1,
+ type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=2,
+ type="StorageBuffer", access="Write|Discard"
+ }
+ }
+}
+```
+
+<a name="snippet1"></a> Snippet 1 : Dispatch region with matrix-matrix multiply
+operation.
+
+```mlir
+hal.executable.target "vulkan*" {
+ module attributes {spv.target_env = ...} {
+ func @main_ex_dispatch() {
+ %c0 = constant 0 : index
+ %0 = hal.interface.load.tensor @legacy_io::@arg0,
+ offset = %c0 : tensor<10x5xf32>
+ %1 = hal.interface.load.tensor @legacy_io::@arg1,
+ offset = %c0 : tensor<10x5xf32>
+ %2 = hal.interface.load.tensor @legacy_io::@arg2,
+ offset = %c0 : tensor<10x5xf32>
+ %3 = "mhlo.add"(%0, %1) :
+ (tensor<10x5xf32>, tensor<10x5xf32>) -> tensor<10x5xf32>
+ %4 = "mhlo.multiply"(%3, %2) :
+ (tensor<10x5xf32>, tensor<10x5xf32>) -> tensor<10x5xf32>
+ hal.interface.store.tensor %4, @legacy_io::@ret0,
+ offset = %c0 : tensor<10x5xf32>
+ return
+ }
+ hal.interface @legacy_io attributes {sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0,
+ type="StorageBuffer", access="Read"
+ hal.interface.binding @arg1, set=0, binding=1,
+ type="StorageBuffer", access="Read"
+ hal.interface.binding @arg2, set=0, binding=2,
+ type="StorageBuffer", access="Read"
+ hal.interface.binding @ret0, set=0, binding=3,
+ type="StorageBuffer", access="Write|Discard"
+ }
+ }
+}
+```
+
+<a name="snippet2"></a> Snippet 2 : Dispatch region with element-wise
+operations.
+
+__Roadmap Note__: The current implementation might not actually fuse the
+operations grouped into a dispatch region into a single kernel. It is possible
+to end up with multiple kernels per dispatch region. Over time we plan to
+address this by using fusion at different levels (see below).
+
+The inputs to the dispatch region are materialized within the entry point
+function using the `hal.interface.load.tensor` operation, This operation returns
+a `tensor` view of the buffer used to store the inputs. Similarly the result of
+the dispatch region are *written* out using the `hal.interface.store.tensor`
+operation.
+
+The main constraint that the code generation operates under is that it should
+not require additional (temporary) buffers to execute the operations grouped
+together within a dispatch region. The rationale behind this constraint is that
+buffer allocation/synchronization in IREE happens at the granularity of dispatch
+regions, allowing the scheduler to make better decision about where to insert
+appropriate synchronizations.
+
+The IR after all the passes used in the lowering from MHLO to SPIR-V for the
+above two examples can be found here ([matrix-matrix multiply op][DotAfterAll],
+[elementwise ops][PwAfterAll]). Below is a description of the major passes used.
+
+## Conversion from MHLO dialect to Linalg on buffers
+
+The code generation pipeline heavily relies on use of
+[Structured Operations][LinalgRationale], specifically the
+[Linalg Dialect][LinalgDialect]. Both, the Linalg operations on `tensor`s and on
+`memref`s are central to the progressive lowering approach followed here. The
+first part of the code generation pipeline is to convert the MHLO operations on
+`tensor`s to Linalg operation on `memref`s. This part of the pipeline is common
+to both CPU and GPU code generation.
+
+The steps involved in this conversion is shown below. Each of the arrows
+represents a pass in the pipeline:
+
+
+
+The next sections describe each of these passes in more detail.
+
+### MHLO to Linalg on tensors
+
+The first step is to convert MHLO operations to Linalg on tensors. This is done
+using the [HLOToLinalgPass][HLOToLinalgPass] from Tensorflow. An example of the
+conversion is shown below, where each of the `mhlo.add` and `mhlo.multiply`
+operations are converted to `linalg.generic` operations on tensors.
+
+```mlir
+#map0 = affine_map<(d0, d1) -> (d0, d1)>
+%3 = linalg.generic
+ {args_in = 2 : i64, args_out = 1 : i64,
+ indexing_maps = [#map0, #map0, #map0],
+ iterator_types = ["parallel", "parallel"]} %0, %1 {
+ ^bb0(%arg0: f32, %arg1: f32): // no predecessors
+ %5 = addf %arg0, %arg1 : f32
+ linalg.yield %5 : f32
+ } : tensor<10x5xf32>, tensor<10x5xf32> -> tensor<10x5xf32>
+%4 = linalg.generic
+ {args_in = 2 : i64, args_out = 1 : i64,
+ indexing_maps = [#map0, #map0, #map0],
+ iterator_types = ["parallel", "parallel"]} %3, %2 {
+ ^bb0(%arg0: f32, %arg1: f32): // no predecessors
+ %5 = mulf %arg0, %arg1 : f32
+ linalg.yield %5 : f32
+ }: tensor<10x5xf32>, tensor<10x5xf32> -> tensor<10x5xf32>
+```
+
+<a name="snippet3"></a> Snippet 3 : MHLO to Linalg conversion for
+[element-wise operations](#snippet2)
+
+At the time of writing the representation of Linalg on `tensor`s does not model
+reduction iterator types completely. Specifically, the reduction in Linalg is
+modeled using read-modify-write approach, i.e. each iteration of the reduction
+loop reads the value stored in the output, adds its contribution, and writes
+back to the same location. This means the output has to be *initialized* to the
+null element of the reduction operator (i.e. 0 if the reduction is done using
+addition). This works for operations on buffers. Since tensors are SSA values
+they cannot be updated in-place. As a result, the reduction semantics does not
+map as well to `tensor`s. For now it is treated as a convention that when the
+Linalg operation is converted to use `memref`s it has to be initialized
+appropriately before performing the reduction. Due to this, the conversion from
+MHLO op to Linalg op is only done for operations which do not need a *reduction*
+iterator type in the converted Linalg op. Consequently, only element-wise
+operations, broadcast operations and data movement operations (like copy and
+transpose) are converted to Linalg operations at this stage.
+
+__Roadmap note__: One long term solution for the above is to have operations on
+tensors that have *reduction* iterator type to take an additional argument that
+contains the initial value of the result tensor. When the operation is converted
+to use `memref`s, the buffer for the initial value operand can be reused for the
+result. The details involved have not been fully worked out yet.
+
+### Fusion of Linalg on tensor operations
+
+The Linalg on `tensor` operations generated at the previous step are fused using
+the [LinalgFusionOfTensorOps][LinalgFusionOfTensorOps] from MLIR. Since
+`tensor`s are SSA values, fusion at this stage can be done without using alias
+analysis or dependence analysis based on reads and writes. Instead the use-def
+chains for the `tensor` values can be used to implement producer-consumer
+fusion. This stage fuses most elementwise operations, broadcast operations and
+data movement operations. An example of the fused op is shown below.
+
+```mlir
+#map0 = affine_map<(d0, d1) -> (d0, d1)>
+%3 = linalg.generic
+ {args_in = 3 : i64, args_out = 1 : i64,
+ indexing_maps = [#map0, #map0, #map0, #map0],
+ iterator_types = ["parallel", "parallel"]} %0, %1, %2 {
+ ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): // no predecessors
+ %4 = addf %arg0, %arg1 : f32
+ %5 = mulf %4, %arg2 : f32
+ linalg.yield %5 : f32
+ }: tensor<?x5xf32>, tensor<?x5xf32>, tensor<?x5xf32> -> tensor<?x5xf32>
+```
+
+<a name="snippet4"></a> Snippet 4: Fusion of Linalg operation on tensors for
+element-wise operations shown in [Snippet 3](#snippet3)
+
+### Conversion of Linalg on tensors to Linalg on buffers
+
+Post fusion all the operation on `tensor`s are converted to analogous operations
+on `memref`s. In general, this requires a buffer allocation pass. In IREE,
+buffer allocation happens at the granularity of dispatch region, and as
+mentioned [earlier](#input-to-the-codegen-pipeline), the dispatch region is not
+expected to use any additional temporary buffers. So instead of having another
+buffer allocation pass within the code generation pipeline, a simpler approach
+is used within IREE:
+
+- For each `hal.interface.store.tensor` an `iree.placeholder` operation is
+ created. The latter uses the same `hal.interface.binding` as the former, but
+ returns a `memref` view of the output of the dispatch region instead of a
+ `tensor` view. This `iree.placeholder` operation is added to start of the
+ entry point function.
+
+- A map is constructed that for a given `tensor` records the `memref` value to
+ use during the conversion. In this map the `tensor` value used in the
+ `hal.interface.store.tensor` is mapped to the `memref` value returned by the
+ created `iree.placeholder` operation.
+
+- The Dialect Conversion framework is used to implement a set of patterns that
+ convert from operations on `tensor`s to operation on `memref`s,
+
+ - A `hal.interface.load.tensor`, is replaced with an `iree.placeholder` to
+ get the `memref` view of the input to the dispatch region.
+ - All Linalg operation on `tensor`s (expected to be just `linalg.generic`
+ or `linalg.indexed_generic` operations) are converted to the
+ corresponding operation on `memref`s. Instead of returning a `tensor`
+ value the converted operation takes an additional `memref` operand as
+ argument. This `memref` is where the result of the operation is
+ populated. Current implementation looks for the `memref` to use from the
+ map constructed previously. If there is no `memref` associated with the
+ result `tensor` the conversion fails.
+ - At this stage, any `mhlo` operation not converted to a Linalg operation
+ are directly converted to a Linalg operation on buffers. This is done
+ for operations that when converted to Linalg have a *reduction* iterator
+ type. Some examples of ops converted this way are
+
+ - `mhlo.dot`
+ - `mhlo.reduce`
+ - `mhlo.conv`
+ - `mhlo.reduce_window`.
+
+ Since the specification of the Linalg operations require the output
+ `memref` to be initialized appropriately, a `linalg.fill` operation is
+ used to achieve this.
+
+__Roadmap Note__ : Right now the code-generation pipeline relies on fusion of
+operations on tensor level. In the near future, we want to be able to fuse
+operations like `linalg.matmul` and `linalg.conv` with consumers/producers that
+are element-wise operations using the
+[fusion of Linalg operation on `memref`s][LinalgFusionOnBuffers].
+
+At this stage of the compilation all operations must have been converted to
+Linalg operations on buffers. Shown below are the IR at the end of this stage
+for the two examples in Snippets 1 and 2.
+
+```mlir
+func @main_ex_dispatch() {
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<4x10xf32>
+ %c0 = constant 0 : index
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<4x5xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<5x10xf32>
+ %cst = constant 0.000000e+00 : f32
+ linalg.matmul(%1, %2, %0) :
+ memref<4x5xf32>, memref<5x10xf32>, memref<4x10xf32>
+ return
+}
+```
+
+<a name="snippet5"></a> Snippet 5 : Matrix-matrix multiply after conversion to
+Linalg operation on `memref`s.
+
+```mlir
+#map0 = affine_map<(d0, d1) -> (d0, d1)>
+func @main_ex_dispatch() {
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<10x5xf32>
+ %c0 = constant 0 : index
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<10x5xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<10x5xf32>
+ %3 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg2} : memref<10x5xf32>
+ linalg.generic
+ {args_in = 3 : i64, args_out = 1 : i64,
+ indexing_maps = [#map0, #map0, #map0],
+ iterator_types = ["parallel", "parallel"]} %1, %2, %3, %0 {
+ ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32): // no predecessors
+ %4 = addf %arg0, %arg1 : f32
+ %5 = mulf %4, %arg2 : f32
+ linalg.yield %5 : f32
+ }: memref<10x5xf32>, memref<10x5xf32>, memref<10x5xf32>, memref<10x5xf32>
+ return
+}
+```
+
+<a name="snippet6"></a> Snippet 6 : Elementwise operations after conversion to
+Linalg operation on `memref`s
+
+The rest of the code-generation differs on whether the compilation is for CPU
+(using LLVM) or for GPU (using SPIR-V).
+
+## Conversion from Linalg on buffers to SPIR-V dialect
+
+The following sections describe the progressive lowering of Linalg operation on
+buffers to SPIR-V dialect. Once lowered to the SPIR-V dialect, it can be
+serialized into a SPIR-V binary using the
+[serialization mechanism provided by the SPIR-V dialect][SpirvSerialization].
+The steps involved in the lowering are described below, with each of the arrows
+representing a pass.
+
+
+
+These passes are described below in more detail.
+
+### Tiling and fusion on buffer operations
+
+The GPU hardware typically provides multiple-levels of compute hierarchy, namely
+*workgroup* level, *subgroup* level and *workitem* level. These map to blocks,
+warps and threads, respectively, in CUDA terminology. Tiling is a way to map the
+computations to each level of the compute hierarchy. For example 3-D tiling a
+`linalg.matmul` operation decomposes the computation into several tiled
+matrix-matrix multiplies.
+[Tiling transformation in Linalg dialect][LinalgTiling] generates the
+outer-loops that iterate over tiled `linalg.matmul` operations. These outer
+loops can be mapped to different workgroups, if they are parallel. The tiled
+`linalg.matmul` operation can be further tiled to map to subgroups. Finally, the
+tiled operation can be lowered to loops with individual iterations mapped to
+workitems. The [LinalgTileAndFusePass][LinalgTileAndFuse] uses the Linalg Tiling
+patterns ([defined here][LinalgTilingPatterns]) to tile operations like
+`linalg.matmul`, `linalg.conv` and `linalg.*_pooling`. The result of tiling the
+code in Snippet 5 is shown below. As expected there are 2-parallel loops that
+iterate over tiles of the original iteration space (i.e. inter-tile loops) and
+can be distributed to workgroups.
+
+```mlir
+func @main_ex_dispatch_0()
+ attributes {
+ spv.entry_point_abi = {local_size = dense<[8, 8, 1]> : vector<3xi32>}} {
+ %cst = constant 0.000000e+00 : f32
+ %c0 = constant 0 : index
+ %c4 = constant 4 : index
+ %c10 = constant 10 : index
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<4x10xf32>
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<4x5xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<5x10xf32>
+ linalg.fill(%0, %cst) : memref<4x10xf32>, f32
+ scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c4, %c10) step (%c8, %c8) {
+ scf.for %arg2 = %c0 to %c5 step %c4 {
+ ...
+ %5 = subview %1[%arg0, %arg2]...
+ ...
+ %8 = subview %2[%arg2, %arg1]...
+ ...
+ %11 = subview %0[%arg0, %arg1]..
+ linalg.matmul {__internal_linalg_transform__ = "workgroup"} %5, %8, %11...
+ }
+ scf.yield
+ }
+ return
+}
+```
+
+<a name="snippet7"></a> Snippet 7 : `linalg.matmul` after tiling.
+
+#### Tile Size and Workgroup Size
+
+When operations that are to be tiled exist within the dispatch function (like
+`linalg.matmul` or `linalg.conv`), this pass also decides the 1. Tile size to be
+used for the tiling. 1. The workgroup size to be used.
+
+The tile size and workgroup size are closely linked since the code within the
+tiled loops are to be collectively executed by the entire workgroup. In other
+words, all workitems in the workgroup collaborate to execute the tiled
+`linalg.matmul`.
+
+__Roadmap Note__ : Currently the tile sizes used in this pass are hard-wired.
+Not much effort has been put into finding ideal tile size for each operation on
+different hardware. The value used is meant to be a baseline to test
+functionality, with performance considerations addressed over time.
+
+#### Markers
+
+Downstream passes have to handle tiled Linalg operations and untiled Linalg
+operation that might exist in the same function in different ways. For example,
+while the former are to be executed collectively by workitems within a
+workgroup, the latter have to be executed by all workitems across workgroups.
+One way to distinguish these two operations is to use the marker mechanism in
+Linalg ([LinalgMarker][LinalgTilingPatterns]). This is a `StrAttr` whose value
+can be used to encode the scope of the operation. For example, in Snippet 7
+above, the tiled `linalg.matmul` operation has a marker `workgroup` to indicate
+that this operation needs to be executed by a workgroup in a collective manner.
+At this time, the code-generation pipeline uses only the `workgroup` marker.
+
+__Roadmap Note__ : Markers are meant to be short-lived, ideally set and consumed
+within the same pass. In the current pipeline the lifetime spans passes to allow
+lowering to different hierarchies. The separate passes that implement the
+lowering from Linalg to SPIR-V can be combined into a single pass, relying A ->
+B -> C translation mechanism of the Dialect Conversion framework to implement
+the progressive lowering. In interest of separation of concerns and for better
+debuggability these passes are kept separate at the cost of having lifetimes of
+markers span passes.
+
+#### Promoting subviews to use workgroup local memory and use of synchronizations
+
+`Workgroup` memory (or `shared memory` in CUDA terminology) can be used to
+prefetch the inputs to the tiled operation. For example in the matrix-matrix
+multiply case, the same data row (column) of the LHS (RHS) matrix is read by
+multiple workitems. Prefetching the data into `Workgroup` memory can reduce the
+number of loads to `StorageClass` memory by an order of magnitude. This
+transformation can be achieved by using the
+[`Linalg Promotion`][LinalgPromotionPatterns] which modifies the `subview`s that
+are the operands to the tiled Linalg operation to use a new `memref` object. The
+size of this `memref` is computed from the size of the `subview`. This `memref`
+object is later lowered to use `Workgroup` memory Storage Class. The snippet
+below shows this transformation when applied to `linalg.matmul` (along with
+tiling). The newly created `memref` objects are annotated with the memory space
+`3` to indicate that they are to be lowered to use `Workgroup` memory. The copy
+of data from the original `memref` into the new `memref`, as well as the
+necessary synchronization constructs are generated as well. Note the memory
+space annotation used here is consistent with what
+[address space annotations used in NVVM][NVVMAddressSpace].
+
+```mlir
+func @matmul_tile()
+ attributes {
+ spv.entry_point_abi = {local_size = dense<[8, 8, 1]> : vector<3xi32>}} {
+ %c96 = constant 96 : index
+ %c4 = constant 4 : index
+ %c8 = constant 8 : index
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<96x96xf32>
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<96x96xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<96x96xf32>
+ scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c96, %c96) step (%c8, %c8) {
+ scf.for %arg2 = %c0 to %c96 step %c4 {
+ ...
+ %5 = subview %0[%arg0, %arg2]...
+ ...
+ %8 = subview %1[%arg2, %arg1]...
+ ...
+ %11 = subview %2[%arg0, %arg1]...
+ %12 = alloc(%c8, %c4) : memref<?x?xf32, 3>
+ %13 = subview %12[%c0, %c0]...
+ %14 = alloc(%c4, %c8) : memref<?x?xf32, 3>
+ %15 = subview %14[%c0, %c0]...
+ linalg.copy(%5, %13) {__internal_linalg_transform__ = "workgroup"}
+ : memref<?x?xf32, #map2>, memref<?x?xf32, #map2, 3>
+ spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
+ linalg.copy(%8, %15) {__internal_linalg_transform__ = "workgroup"}
+ : memref<?x?xf32, #map2>, memref<?x?xf32, #map2, 3>
+ spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
+ linalg.matmul {__internal_linalg_transform__ = "workgroup"} %13, %15, %11...
+ spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
+ dealloc %12 : memref<?x?xf32, 3>
+ dealloc %14 : memref<?x?xf32, 3>
+ }
+ scf.yield
+ }
+ return
+}
+```
+
+<a name="snippet8"></a> Snippet 8: `linalg.matmul` after tiling and promotion of
+operand subviews to use `Workgroup` memory.
+
+### Distributing to workgroups and workitems
+
+After tiling the operations within the dispatch functions are either
+`scf.parallel` operations or Linalg operations.
+
+- The outer `scf.parallel` operations represent parallel loops that are to be
+ distributed across workgroups. The distribution here assumes that the number
+ of workgroups along each dimension is equal to the number of iterations of
+ the `scf.parallel` operation.
+
+- Linalg operations that are not tiled, and are therefore __not within__ `scf`
+ operations, are lowered to loops. The resulting outer `scf.parallel`
+ operations are collapsed to have a single induction variable. This loop is
+ then distributed across workitems using their `GlobalInvocationId`, (which
+ is same as `blockIdx * blockDim + threadIdx` in CUDA terminology).
+
+- Linalg operations that are tiled, and are therefore __within__ `scf`
+ operations, are lowered to loops and the iterations of the `scf.parallel`
+ operations are mapped to workitems using their `LocalInvocationId` (which is
+ same as `threadIdx` in CUDA terminology). Note that these operations are
+ tagged with the `workgroup` marker which makes it easy to disambiguate from
+ the case where Linalg operations are outside of `scf` operations. Here too,
+ the distribution assumes that the workgroup size is greater than or equal to
+ the number of iterations of the partitioned loop.
+
+These transformations are applied by the [`ConvertToGPUPass`][ConvertToGPU].
+Below is the result of applying this pass to Snippet 7. The outer `scf.parallel`
+loop is distributed across workgroups. The tiled `linalg.matmul` operation is
+lowered to loops, and the outer `scf.parallel` operation generated during this
+lowering are distributed across workitems within the workgroup.
+
+```mlir
+func @main_ex_dispatch_0_dispatch_1()
+ attributes {
+ spv.entry_point_abi = {local_size = dense<[8, 8, 1]> : vector<3xi32>}} {
+ %c5 = constant 5 : index
+ %c8 = constant 8 : index
+ %c4 = constant 4 : index
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<4x10xf32>
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<4x5xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<5x10xf32>
+ %3 = "gpu.block_id"() {dimension = "x"} : () -> index
+ %4 = muli %3, %c8 : index
+ scf.for %arg0 = %c0 to %c5 step %c4 {
+ ...
+ %9 = subview %1[0, %arg0]
+ ...
+ %14 = subview %2[%arg0, %4]
+ %15 = subview %0[0, %4]
+ %16 = "gpu.thread_id"() {dimension = "x"} : () -> index
+ %17 = "gpu.thread_id"() {dimension = "y"} : () -> index
+ %18 = cmpi "slt", %17, %c4 : index
+ %19 = cmpi "slt", %16, %13 : index
+ %20 = and %18, %19 : i1
+ scf.if %20 {
+ scf.for %arg1 = %c0 to %8 step %c1 {
+ %21 = load %9[%17, %arg1] : memref<4x?xf32, #map0>
+ %22 = load %14[%arg1, %16] : memref<?x?xf32, #map1>
+ %23 = load %15[%17, %16] : memref<4x?xf32, #map1>
+ %24 = mulf %21, %22 : f32
+ %25 = addf %23, %24 : f32
+ store %25, %15[%17, %16] : memref<4x?xf32, #map1>
+ }
+ }
+ }
+ return
+}
+```
+
+<a name="snippet9"></a> Snippet 9: `linalg.matmul` after distributing parallel
+inter-tile loops to workgroups and intra-tile loops to workitems.
+
+[Snippet 6](#snippet6) shows the fused element-wise operations represented using
+a `linalg.generic` operation. This operation is not tiled in the
+`LinalgTileAndFusePass`. So the `ConvertToGPUPass` lowers this operation to
+`scf.parallel` loops, which are collapsed into a `scf.parallel` operation with a
+single induction variable. This loop is then distributed across workitems using
+the `GlobalInvocationId`. The resulting IR is shown below.
+
+```mlir
+func @main_ex_dispatch_0()
+ attributes {
+ spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
+ %c50 = constant 50 : index
+ %c5 = constant 5 : index
+ %0 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@ret0} : memref<10x5xf32>
+ %1 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg0} : memref<10x5xf32>
+ %2 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg1} : memref<10x5xf32>
+ %3 = iree.placeholder for "interface buffer"
+ {binding = @legacy_io::@arg2} : memref<10x5xf32>
+ %4 = "gpu.block_id"() {dimension = "x"} : () -> index
+ %5 = "gpu.block_dim"() {dimension = "x"} : () -> index
+ %6 = "gpu.thread_id"() {dimension = "x"} : () -> index
+ %7 = muli %4, %5 : index
+ %8 = addi %7, %6 : index
+ %9 = cmpi "slt", %8, %c50 : index
+ scf.if %9 {
+ %10 = divi_signed %8, %c5 : index
+ %11 = remi_signed %8, %c5 : index
+ %12 = load %1[%10, %11] : memref<10x5xf32>
+ %13 = load %2[%10, %11] : memref<10x5xf32>
+ %14 = load %3[%10, %11] : memref<10x5xf32>
+ %15 = addf %12, %13 : f32
+ %16 = mulf %15, %14 : f32
+ store %16, %0[%10, %11] : memref<10x5xf32>
+ }
+ return
+}
+```
+
+<a name="snippet10"></a> Snippet 10: Distributing the iterations for pointwise
+operations for GPU execution.
+
+### Lowering to SPIR-V dialect
+
+The last step is to take the result of the previous pass and lowering it to
+SPIR-V dialect. Since SPIR-V dialect is *closed*, i.e. it has a separate type
+system, its best to lower all the operations to SPIR-V in one step. This is done
+by applying all the patterns that lower all the different IR constructs into
+SPIR-V within the [`ConvertToSPIRVPass`][ConvertToSPIRV]. These are
+
+- [GPU dialect to SPIR-V conversion][GPUToSPIRV].
+- [SCF dialect to SPIR-V conversion][SCFToSPIRV].
+- [Standard dialect to SPIR-V conversion][StandardToSPIRV].
+- Patterns that lower the `iree.placeholder` instruction into a SPIR-V.
+
+Once applied the resulting IR is in SPIR-V dialect that can be serialized to a
+SPIR-V binary.
+
+[ConvertToGPU]: https://github.com/google/iree/blob/main/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp
+[ConvertToSPIRV]: https://github.com/google/iree/blob/main/iree/compiler/Conversion/LinalgToSPIRV/ConvertToSPIRVPass.cpp
+[DotAfterAll]: https://gist.github.com/MaheshRavishankar/9e2d406296f469515c4a79bf1e7eef44
+[GPUToSPIRV]: https://github.com/llvm/llvm-project/blob/master/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
+[HLOToLinalgPass]: https://github.com/tensorflow/tensorflow/blob/75c40f6bff2faa3d90a375dfa4025b2e6e2d7a3d/tensorflow/compiler/mlir/xla/transforms/passes.h#L67
+[LinalgDialect]: https://mlir.llvm.org/docs/Dialects/Linalg/
+[LinalgFusionOnBuffers]: https://github.com/llvm/llvm-project/blob/ef868a848e6def288d2df7a1b3ebe09463afc8d0/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h#L86
+[LinalgFusionOfTensorOps]: https://github.com/llvm/llvm-project/blob/80cb25cbd555f9634836b766c86aead435b60eaa/mlir/include/mlir/Dialect/Linalg/Passes.td#L30
+[LinalgPromotionPatterns]: https://github.com/llvm/llvm-project/blob/303a7f7a26e2aae1cb85f49dccbc0b5d14e0b2e0/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h#L358
+[LinalgRationale]: https://mlir.llvm.org/docs/Rationale/RationaleLinalgDialect/
+[LinalgTileAndFuse]: https://github.com/google/iree/blob/main/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndFusePass.cpp
+[LinalgTiling]: https://mlir.llvm.org/docs/Dialects/Linalg/#set-of-key-transformationsa-namekey_transformationsa
+[LinalgTilingPatterns]: https://github.com/llvm/llvm-project/blob/master/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+[NVVMAddressSpace]: https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#address-space
+[PwAfterAll]: https://gist.github.com/MaheshRavishankar/02cdd22f7c99e568f933244b5a679510
+[SCFToSPIRV]: https://github.com/llvm/llvm-project/blob/master/mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h
+[SpirvSerialization]: https://mlir.llvm.org/docs/Dialects/SPIR-V/#serialization-and-deserialization
+[StandardToSPIRV]: https://github.com/llvm/llvm-project/blob/master/mlir/include/mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.h
diff --git a/docs/dynamic_shapes.md b/docs/design_docs/dynamic_shapes.md
similarity index 100%
rename from docs/dynamic_shapes.md
rename to docs/design_docs/dynamic_shapes.md
diff --git a/docs/function_abi.md b/docs/design_docs/function_abi.md
similarity index 99%
rename from docs/function_abi.md
rename to docs/design_docs/function_abi.md
index 797f2f6..71d91bb 100644
--- a/docs/function_abi.md
+++ b/docs/design_docs/function_abi.md
@@ -1,4 +1,4 @@
-# Function signatures
+# Function Signatures
A key job of the IREE compiler and runtime is capturing function call semantics
from the originating system and providing mechanisms so that invocations can be
diff --git a/docs/design_docs/hlo_to_linalg.png b/docs/design_docs/hlo_to_linalg.png
new file mode 100755
index 0000000..469ed26
--- /dev/null
+++ b/docs/design_docs/hlo_to_linalg.png
Binary files differ
diff --git a/docs/design_docs/linalg_to_spirv.png b/docs/design_docs/linalg_to_spirv.png
new file mode 100755
index 0000000..fd6aee7
--- /dev/null
+++ b/docs/design_docs/linalg_to_spirv.png
Binary files differ
diff --git a/docs/simple_ir_walkthrough.md b/docs/design_docs/simple_ir_walkthrough.md
similarity index 99%
rename from docs/simple_ir_walkthrough.md
rename to docs/design_docs/simple_ir_walkthrough.md
index 1dfcb49..68f51a8 100644
--- a/docs/simple_ir_walkthrough.md
+++ b/docs/design_docs/simple_ir_walkthrough.md
@@ -1,5 +1,7 @@
# Simple IR Walkthrough
+Note that this doc is quite outdated. We expect to update it soon.
+
## Overview
This walks through the process of lowering TensorFlow python to an IREE module,
diff --git a/docs/roadmap_design.md b/docs/design_roadmap.md
similarity index 100%
rename from docs/roadmap_design.md
rename to docs/design_roadmap.md
diff --git a/docs/benchmarking.md b/docs/developing_iree/benchmarking.md
similarity index 100%
rename from docs/benchmarking.md
rename to docs/developing_iree/benchmarking.md
diff --git a/docs/contributor_tips.md b/docs/developing_iree/contributor_tips.md
similarity index 89%
rename from docs/contributor_tips.md
rename to docs/developing_iree/contributor_tips.md
index 05fb52e..03ddffc 100644
--- a/docs/contributor_tips.md
+++ b/docs/developing_iree/contributor_tips.md
@@ -2,11 +2,11 @@
This is an opinionated guide documenting workflows that some members of the team
have found useful. It is focused on meta-tooling, not on IREE code specifically
-(you will find the latter in the [Developer Overview](../developer_overview.md))
-It is certainly possible to use workflows other than these, but some common
-tasks, especially for maintainers will likely be made easier if you use these
-flows. It assumes a basic knowledge of `git` and GitHub and suggests some
-specific ways of using it.
+(you will find the latter in the [Developer Overview](developer_overview.md)) It
+is certainly possible to use workflows other than these, but some common tasks,
+especially for maintainers will likely be made easier if you use these flows. It
+assumes a basic knowledge of `git` and GitHub and suggests some specific ways of
+using it.
## Git Structure
diff --git a/docs/developer_overview.md b/docs/developing_iree/developer_overview.md
similarity index 89%
rename from docs/developer_overview.md
rename to docs/developing_iree/developer_overview.md
index 56fb5f8..ec900f5 100644
--- a/docs/developer_overview.md
+++ b/docs/developing_iree/developer_overview.md
@@ -150,7 +150,7 @@
and executes it as a series of
[googletest](https://github.com/google/googletest) tests. This is the test
runner for the IREE
-[check framework](https://github.com/google/iree/tree/main/docs/testing_guide.md#end-to-end-tests).
+[check framework](https://github.com/google/iree/tree/main/docs/developing_iree/testing_guide.md#end-to-end-tests).
```shell
$ bazel run iree/tools:iree-translate -- \
@@ -207,6 +207,28 @@
accept a number where 0, 1, 2, 3 stands for info, warning, error, and fatal
error respectively.
+#### Read inputs from a file
+
+All the IREE tools support reading input values from a file. This is quite
+useful for debugging. Use `-help` for each tool to see what the flag to set. The
+inputs are expected to be newline-separated. Each input should be either a
+scalar or a buffer. Scalars should be in the format `type=value` and buffers
+should be in the format `[shape]xtype=[value]`. For example:
+
+```
+1x5xf32=1,-2,-3,4,-5
+1x5x3x1xf32=15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
+```
+
+#### `iree-flow-trace-dispatch-tensors`
+
+This flag will enable tracing inputs and outputs for each dispatch function. It
+is easier to narrow down test cases, since IREE breaks a ML workload into
+multiple dispatch function. When the flag is on, IREE will insert trace points
+before and after each dispatch function. The first trace op is for inputs, and
+the second trace op is for outputs. There will be two events for one dispatch
+function.
+
### Useful Vulkan driver flags
For IREE's Vulkan runtime driver, there are a few useful
diff --git a/docs/repository_management.md b/docs/developing_iree/repository_management.md
similarity index 100%
rename from docs/repository_management.md
rename to docs/developing_iree/repository_management.md
diff --git a/docs/testing_guide.md b/docs/developing_iree/testing_guide.md
similarity index 98%
rename from docs/testing_guide.md
rename to docs/developing_iree/testing_guide.md
index 3be690d..5f57c9c 100644
--- a/docs/testing_guide.md
+++ b/docs/developing_iree/testing_guide.md
@@ -30,7 +30,7 @@
To use the Vulkan backend as test driver, you may need to select between a
Vulkan implementation from SwiftShader and multiple Vulkan-capable hardware
devices. This can be done via environment variables. See the
-[generic Vulkan setup](GetStarted/generic_vulkan_env_setup.md#useful-environment-variables)
+[generic Vulkan setup](get_started/generic_vulkan_env_setup.md#useful-environment-variables)
page for details regarding these variables.
For Bazel, you can persist the configuration in `user.bazelrc` to save typing.
diff --git a/docs/GetStarted/cmake_options_and_variables.md b/docs/get_started/cmake_options_and_variables.md
similarity index 91%
rename from docs/GetStarted/cmake_options_and_variables.md
rename to docs/get_started/cmake_options_and_variables.md
index 8e121fc..3f3dbdd 100644
--- a/docs/GetStarted/cmake_options_and_variables.md
+++ b/docs/get_started/cmake_options_and_variables.md
@@ -63,17 +63,21 @@
#### `IREE_HAL_DRIVERS_TO_BUILD`:STRING
-*This does not have any effect at the moment, but will be supported in the
-future!* Semicolon-separated list of HAL drivers to build, or `all` for building
-all HAL drivers. Case-insensitive. Defaults to `all`. Example:
+*Righ now this only affects whether tests are enabled when compiling for
+Android; it will be fully supported in the future!*
+
+Semicolon-separated list of HAL drivers to build, or `all` for building all HAL
+drivers. Case-insensitive. Defaults to `all`. Example:
`-DIREE_HAL_DRIVERS_TO_BUILD="Vulkan;VMLA"`.
#### `IREE_TARGET_BACKENDS_TO_BUILD`:STRING
-*This does not have any effect at the moment, but will be supported in the
-future!* Semicolon-separated list of HAL drivers to build, or `all` for building
-all HAL drivers. Case-insensitive. Defaults to `all`. Example:
-`-DIREE_HAL_DRIVERS_TO_BUILD="Vulkan_SPIRV;VMLA"`.
+*Righ now this only affects whether tests are enabled when compiling for
+Android; it will be fully supported in the future!*
+
+Semicolon-separated list of HAL drivers to build, or `all` for building all
+compiler target backends. Case-insensitive. Defaults to `all`. Example:
+`-DIREE_HAL_DRIVERS_TO_BUILD="Vulkan-SPIRV;VMLA"`.
#### `IREE_ENABLE_LLD`:BOOL
diff --git a/docs/GetStarted/generic_vulkan_env_setup.md b/docs/get_started/generic_vulkan_env_setup.md
similarity index 100%
rename from docs/GetStarted/generic_vulkan_env_setup.md
rename to docs/get_started/generic_vulkan_env_setup.md
diff --git a/docs/GetStarted/getting_started_android_cmake.md b/docs/get_started/getting_started_android_cmake.md
similarity index 100%
rename from docs/GetStarted/getting_started_android_cmake.md
rename to docs/get_started/getting_started_android_cmake.md
diff --git a/docs/GetStarted/getting_started_linux_bazel.md b/docs/get_started/getting_started_linux_bazel.md
similarity index 97%
rename from docs/GetStarted/getting_started_linux_bazel.md
rename to docs/get_started/getting_started_linux_bazel.md
index 8d4a9dc..7f8b688 100644
--- a/docs/GetStarted/getting_started_linux_bazel.md
+++ b/docs/get_started/getting_started_linux_bazel.md
@@ -123,7 +123,7 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md)
+ [Developer Overview](../developing_iree/developer_overview.md)
* To target GPUs using Vulkan, see
[Getting Started on Linux with Vulkan](getting_started_linux_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_linux_cmake.md b/docs/get_started/getting_started_linux_cmake.md
similarity index 97%
rename from docs/GetStarted/getting_started_linux_cmake.md
rename to docs/get_started/getting_started_linux_cmake.md
index 70da146..127a80c 100644
--- a/docs/GetStarted/getting_started_linux_cmake.md
+++ b/docs/get_started/getting_started_linux_cmake.md
@@ -110,7 +110,7 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md)
+ [Developer Overview](../developing_iree/developer_overview.md)
* To target GPUs using Vulkan, see
[Getting Started on Linux with Vulkan](getting_started_linux_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_linux_vulkan.md b/docs/get_started/getting_started_linux_vulkan.md
similarity index 100%
rename from docs/GetStarted/getting_started_linux_vulkan.md
rename to docs/get_started/getting_started_linux_vulkan.md
diff --git a/docs/GetStarted/getting_started_macos_bazel.md b/docs/get_started/getting_started_macos_bazel.md
similarity index 95%
rename from docs/GetStarted/getting_started_macos_bazel.md
rename to docs/get_started/getting_started_macos_bazel.md
index 2b285eb..3fc3dcb 100644
--- a/docs/GetStarted/getting_started_macos_bazel.md
+++ b/docs/get_started/getting_started_macos_bazel.md
@@ -126,8 +126,8 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md) <!-- TODO: Link to macOS
- versions of these guides once they are developed.
+ [Developer Overview](../developing_iree/developer_overview.md) <!-- TODO:
+ Link to macOS versions of these guides once they are developed.
* To target GPUs using Vulkan, see
[Getting Started on Linux with Vulkan](getting_started_linux_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_macos_cmake.md b/docs/get_started/getting_started_macos_cmake.md
similarity index 95%
rename from docs/GetStarted/getting_started_macos_cmake.md
rename to docs/get_started/getting_started_macos_cmake.md
index 51ef0ab..7b916cd 100644
--- a/docs/GetStarted/getting_started_macos_cmake.md
+++ b/docs/get_started/getting_started_macos_cmake.md
@@ -110,8 +110,8 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md) <!-- TODO: Link to macOS
- versions of these guides once they are developed.
+ [Developer Overview](../developing_iree/developer_overview.md) <!-- TODO:
+ Link to macOS versions of these guides once they are developed.
* To target GPUs using Vulkan, see
[Getting Started on Linux with Vulkan](getting_started_linux_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_python.md b/docs/get_started/getting_started_python.md
similarity index 100%
rename from docs/GetStarted/getting_started_python.md
rename to docs/get_started/getting_started_python.md
diff --git a/docs/GetStarted/getting_started_windows_bazel.md b/docs/get_started/getting_started_windows_bazel.md
similarity index 97%
rename from docs/GetStarted/getting_started_windows_bazel.md
rename to docs/get_started/getting_started_windows_bazel.md
index d3f01c3..8cf0f87 100644
--- a/docs/GetStarted/getting_started_windows_bazel.md
+++ b/docs/get_started/getting_started_windows_bazel.md
@@ -118,7 +118,7 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md)
+ [Developer Overview](../developing_iree/developer_overview.md)
* To target GPUs using Vulkan, see
[Getting Started on Windows with Vulkan](getting_started_windows_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_windows_cmake.md b/docs/get_started/getting_started_windows_cmake.md
similarity index 97%
rename from docs/GetStarted/getting_started_windows_cmake.md
rename to docs/get_started/getting_started_windows_cmake.md
index dcab418..da5218f 100644
--- a/docs/GetStarted/getting_started_windows_cmake.md
+++ b/docs/get_started/getting_started_windows_cmake.md
@@ -107,7 +107,7 @@
### Further Reading
* For an introduction to IREE's project structure and developer tools, see
- [Developer Overview](../developer_overview.md)
+ [Developer Overview](../developing_iree/developer_overview.md)
* To target GPUs using Vulkan, see
[Getting Started on Windows with Vulkan](getting_started_windows_vulkan.md)
* To use IREE's Python bindings, see
diff --git a/docs/GetStarted/getting_started_windows_vulkan.md b/docs/get_started/getting_started_windows_vulkan.md
similarity index 100%
rename from docs/GetStarted/getting_started_windows_vulkan.md
rename to docs/get_started/getting_started_windows_vulkan.md
diff --git a/docs/IREE-Architecture.svg b/docs/iree_architecture.svg
similarity index 100%
rename from docs/IREE-Architecture.svg
rename to docs/iree_architecture.svg
diff --git a/docs/roadmap.md b/docs/milestones.md
similarity index 94%
rename from docs/roadmap.md
rename to docs/milestones.md
index b979de7..322fa7e 100644
--- a/docs/roadmap.md
+++ b/docs/milestones.md
@@ -1,11 +1,11 @@
-# IREE Roadmap
+# IREE Milestones
## Design
Though many of the core dialects are now in place enough for correctness testing
a large majority of the features we are most excited to demonstrate are still
TODO and will be coming over the next few quarters. You can find a highlighted
-set of coming features in the [design roadmap](roadmap_design.md).
+set of coming features in the [design roadmap](design_roadmap.md).
## Spring/Summer 2020 Focus Areas
@@ -37,7 +37,7 @@
### HAL: Marl CPU Scheduling
We want to plug in [marl](https://github.com/google/marl) to provide
-[CPU-side work scheduling](roadmap_design.md#gpu-like-cpu-scheduling) that
+[CPU-side work scheduling](design_roadmap.md#gpu-like-cpu-scheduling) that
matches GPU semantics. This will enable improved CPU utilization and allow us to
verify the approach with benchmarks.
diff --git a/docs/mnist_example.md b/docs/mnist_example.md
deleted file mode 100644
index 93cd23a..0000000
--- a/docs/mnist_example.md
+++ /dev/null
@@ -1,254 +0,0 @@
-# MNIST IR Example
-
-This shows the MNIST MLP model as it is compiled from Keras, lowered to XLA HLO,
-and then lowered to an IREE module with SPIR-V. Several steps are omitted for
-brevity.
-
-## TensorFlow Keras Model
-
-```python
-def simple_mnist_model(input_shape):
- """Creates a simple (multi-layer perceptron) MNIST model."""
- model = tf.keras.models.Sequential()
- # Flatten to a 1d array (e.g. 28x28 -> 784)
- model.add(tf.keras.layers.Flatten(input_shape=input_shape))
- # Fully-connected neural layer with 128 neurons, RELU activation
- model.add(tf.keras.layers.Dense(128, activation='relu'))
- # Fully-connected neural layer returning probability scores for each class
- model.add(tf.keras.layers.Dense(10, activation='softmax'))
- return model
-```
-
-## XLA HLO
-
-**NOTE**: this uses placeholder weights to keep the page from being a few
-thousand lines of floats.
-
-```mlir
-module {
- func @main(%arg0: tensor<1x28x28x1xf32>) -> tuple<tensor<1x10xf32>>
- attributes {iree.module.export} {
- %cst = constant {name = "constant.9"} dense<0.5> : tensor<f32>
- %0 = "mhlo.broadcast_in_dim"(%cst) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
- %1 = "mhlo.copy"(%arg0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
- %2 = "mhlo.reshape"(%1) {name = "reshape.2"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
- %3 = "mhlo.reshape"(%2) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
- %cst_0 = constant {name = "constant.4"} dense<0.5> : tensor<784x128xf32>
- %4 = "mhlo.dot"(%3, %cst_0) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
- %cst_1 = constant {name = "constant.6"} dense<0.5> : tensor<128xf32>
- %5 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
- %6 = "mhlo.add"(%4, %5) {name = "add.8"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
- %7 = "mhlo.maximum"(%0, %6) {name = "maximum.11"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
- %cst_2 = constant {name = "constant.12"} dense<0.5> : tensor<128x10xf32>
- %8 = "mhlo.dot"(%7, %cst_2) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
- %cst_3 = constant {name = "constant.14"} dense<0.5> : tensor<10xf32>
- %9 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
- %10 = "mhlo.add"(%8, %9) {name = "add.16"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
- %cst_4 = constant {name = "constant.17"} dense<0xFF800000> : tensor<f32>
- %11 = "mhlo.reduce"(%10, %cst_4) ( {
- ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors
- %20 = "mhlo.maximum"(%arg1, %arg2) {name = "maximum.21"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
- "mhlo.return"(%20) : (tensor<f32>) -> ()
- }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
- %12 = "mhlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
- %13 = "mhlo.subtract"(%10, %12) {name = "subtract.24"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
- %14 = "mhlo.exponential"(%13) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
- %cst_5 = constant {name = "constant.27"} dense<0.5> : tensor<f32>
- %15 = "mhlo.reduce"(%14, %cst_5) ( {
- ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors
- %21 = "mhlo.add"(%arg3, %arg4) {name = "add.31"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
- "mhlo.return"(%21) : (tensor<f32>) -> ()
- }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
- %16 = "mhlo.broadcast_in_dim"(%15) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
- %17 = "mhlo.divide"(%14, %16) {name = "divide.35"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
- %18 = "mhlo.reshape"(%17) {name = "reshape.36"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
- %19 = "mhlo.tuple"(%18) {name = "tuple.37"} : (tensor<1x10xf32>) -> tuple<tensor<1x10xf32>>
- return %19 : tuple<tensor<1x10xf32>>
- }
-}
-```
-
-## IREE IR (pre-backend lowering)
-
-Here's the lowered, outlined, and compiler-annotated version of the above in the
-IREE sequencer dialect.
-
-```mlir
-module {
- iree.multi_arch_executable @main_ex_dispatch_0[0]() {
- iree.executable[0](Unspecified) {
- module {
- func @main_entry_dispatch_0(%arg0: memref<1x28x28x1xf32>, %arg1: memref<1x784xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[784, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x28x28x1xf32>) : tensor<1x28x28x1xf32>
- %1 = "mhlo.copy"(%0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
- %2 = "mhlo.reshape"(%1) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
- iree.store_output(%2 : tensor<1x784xf32>, %arg1 : memref<1x784xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_1[1]() {
- iree.executable[1](Unspecified) {
- module {
- func @main_entry_dispatch_1(%arg0: memref<1x784xf32>, %arg1: memref<784x128xf32>, %arg2: memref<1x128xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[128, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x784xf32>) : tensor<1x784xf32>
- %1 = iree.load_input(%arg1 : memref<784x128xf32>) : tensor<784x128xf32>
- %2 = "mhlo.dot"(%0, %1) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
- iree.store_output(%2 : tensor<1x128xf32>, %arg2 : memref<1x128xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_2[2]() {
- iree.executable[2](Unspecified) {
- module {
- func @main_entry_dispatch_2(%arg0: memref<1x128xf32>, %arg1: memref<1x128xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[128, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x128xf32>) : tensor<1x128xf32>
- %cst = constant dense<5.000000e-01> : tensor<128xf32>
- %cst_0 = constant dense<5.000000e-01> : tensor<f32>
- %1 = "mhlo.broadcast_in_dim"(%cst_0) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
- %2 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
- %3 = addf %0, %2 : tensor<1x128xf32>
- %4 = mhlo.maximum %1, %3 {name = "maximum.11"} : tensor<1x128xf32>
- iree.store_output(%4 : tensor<1x128xf32>, %arg1 : memref<1x128xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_3[3]() {
- iree.executable[3](Unspecified) {
- module {
- func @main_entry_dispatch_3(%arg0: memref<1x128xf32>, %arg1: memref<128x10xf32>, %arg2: memref<1x10xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x128xf32>) : tensor<1x128xf32>
- %1 = iree.load_input(%arg1 : memref<128x10xf32>) : tensor<128x10xf32>
- %2 = "mhlo.dot"(%0, %1) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
- iree.store_output(%2 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_4[4]() {
- iree.executable[4](Unspecified) {
- module {
- func @main_entry_dispatch_4(%arg0: memref<1x10xf32>, %arg1: memref<1x10xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
- %cst = constant dense<5.000000e-01> : tensor<10xf32>
- %1 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
- %2 = addf %0, %1 : tensor<1x10xf32>
- iree.store_output(%2 : tensor<1x10xf32>, %arg1 : memref<1x10xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_5[5]() {
- iree.executable[5](Unspecified) {
- module {
- func @main_entry_dispatch_5(%arg0: memref<1x10xf32>, %arg1: memref<1xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<1> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
- %cst = constant dense<0xFF800000> : tensor<f32>
- %1 = "mhlo.reduce"(%0, %cst) ( {
- ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>): // no predecessors
- %2 = mhlo.maximum %arg2, %arg3 {name = "maximum.21"} : tensor<f32>
- "mhlo.return"(%2) : (tensor<f32>) -> ()
- }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
- iree.store_output(%1 : tensor<1xf32>, %arg1 : memref<1xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_6[6]() {
- iree.executable[6](Unspecified) {
- module {
- func @main_entry_dispatch_6(%arg0: memref<1x10xf32>, %arg1: memref<1xf32>, %arg2: memref<1x10xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
- %1 = iree.load_input(%arg1 : memref<1xf32>) : tensor<1xf32>
- %2 = "mhlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
- %3 = subf %0, %2 : tensor<1x10xf32>
- %4 = "mhlo.exponential"(%3) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
- iree.store_output(%4 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_7[7]() {
- iree.executable[7](Unspecified) {
- module {
- func @main_entry_dispatch_7(%arg0: memref<1x10xf32>, %arg1: memref<1xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<1> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
- %cst = constant dense<5.000000e-01> : tensor<f32>
- %1 = "mhlo.reduce"(%0, %cst) ( {
- ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>): // no predecessors
- %2 = addf %arg2, %arg3 : tensor<f32>
- "mhlo.return"(%2) : (tensor<f32>) -> ()
- }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
- iree.store_output(%1 : tensor<1xf32>, %arg1 : memref<1xf32>)
- iree.return
- }
- }
- }
- }
- iree.multi_arch_executable @main_ex_dispatch_8[8]() {
- iree.executable[8](Unspecified) {
- module {
- func @main_entry_dispatch_8(%arg0: memref<1xf32>, %arg1: memref<1x10xf32>, %arg2: memref<1x10xf32>)
- attributes {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
- %0 = iree.load_input(%arg0 : memref<1xf32>) : tensor<1xf32>
- %1 = iree.load_input(%arg1 : memref<1x10xf32>) : tensor<1x10xf32>
- %2 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
- %3 = divf %1, %2 : tensor<1x10xf32>
- iree.store_output(%3 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
- iree.return
- }
- }
- }
- }
- func @main(%arg0: memref<1x28x28x1xf32>) -> memref<1x10xf32>
- attributes {iree.module.export} {
- %0 = "iree_ll_seq.constant"() {value = dense<5.000000e-01> : tensor<784x128xf32>} : () -> memref<784x128xf32>
- %1 = "iree_ll_seq.constant"() {value = dense<5.000000e-01> : tensor<128x10xf32>} : () -> memref<128x10xf32>
- %2 = "iree_ll_seq.alloc_heap"() : () -> memref<1x784xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_0::main_entry_dispatch_0[dense<[784, 1, 1]> : tensor<3xi32>](%arg0, %2) : (memref<1x28x28x1xf32>, memref<1x784xf32>) -> ()
- %3 = "iree_ll_seq.alloc_heap"() : () -> memref<1x128xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_1::main_entry_dispatch_1[dense<[128, 1, 1]> : tensor<3xi32>](%2, %0, %3) : (memref<1x784xf32>, memref<784x128xf32>, memref<1x128xf32>) -> ()
- %4 = "iree_ll_seq.alloc_heap"() : () -> memref<1x128xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_2::main_entry_dispatch_2[dense<[128, 1, 1]> : tensor<3xi32>](%3, %4) : (memref<1x128xf32>, memref<1x128xf32>) -> ()
- %5 = "iree_ll_seq.alloc_heap"() : () -> memref<1x10xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_3::main_entry_dispatch_3[dense<[10, 1, 1]> : tensor<3xi32>](%4, %1, %5) : (memref<1x128xf32>, memref<128x10xf32>, memref<1x10xf32>) -> ()
- %6 = "iree_ll_seq.alloc_heap"() : () -> memref<1x10xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_4::main_entry_dispatch_4[dense<[10, 1, 1]> : tensor<3xi32>](%5, %6) : (memref<1x10xf32>, memref<1x10xf32>) -> ()
- %7 = "iree_ll_seq.alloc_heap"() : () -> memref<1xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_5::main_entry_dispatch_5[dense<1> : tensor<3xi32>](%6, %7) : (memref<1x10xf32>, memref<1xf32>) -> ()
- %8 = "iree_ll_seq.alloc_heap"() : () -> memref<1x10xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_6::main_entry_dispatch_6[dense<[10, 1, 1]> : tensor<3xi32>](%6, %7, %8) : (memref<1x10xf32>, memref<1xf32>, memref<1x10xf32>) -> ()
- %9 = "iree_ll_seq.alloc_heap"() : () -> memref<1xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_7::main_entry_dispatch_7[dense<1> : tensor<3xi32>](%8, %9) : (memref<1x10xf32>, memref<1xf32>) -> ()
- %10 = "iree_ll_seq.alloc_heap"() : () -> memref<1x10xf32>
- iree_ll_seq.static_dispatch main_ex_dispatch_8::main_entry_dispatch_8[dense<[10, 1, 1]> : tensor<3xi32>](%9, %8, %10) : (memref<1xf32>, memref<1x10xf32>, memref<1x10xf32>) -> ()
- iree_ll_seq.return %10 : memref<1x10xf32>
- }
-}
-```
-
-**NOTE**: this is effectively compiling in -O0, which is why the buffers are not
-aliased and some dispatch region fusing is not performed. As we get things going
-we'll be adding simple optimizations that can operate on this IR to elide almost
-all copies and externalize allocations to transient pooled memory.
-
-## Final IREE Module with SPIR-V
-
-TODO(benvanik): once reductions are done.
diff --git a/docs/using_colab.md b/docs/using_iree/using_colab.md
similarity index 100%
rename from docs/using_colab.md
rename to docs/using_iree/using_colab.md
diff --git a/experimental/ModelBuilder/MemRefUtils.h b/experimental/ModelBuilder/MemRefUtils.h
index ebd6d1b..cb03bde 100644
--- a/experimental/ModelBuilder/MemRefUtils.h
+++ b/experimental/ModelBuilder/MemRefUtils.h
@@ -44,8 +44,8 @@
#include <functional>
#include <memory>
-#include "llvm/ADT/Optional.h"
-#include "mlir/ExecutionEngine/CRunnerUtils.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/Optional.h"
+#include "third_party/llvm/llvm-project/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h"
#ifndef IREE_EXPERIMENTAL_MODELBUILDER_MEMREFUTILS_H_
#define IREE_EXPERIMENTAL_MODELBUILDER_MEMREFUTILS_H_
@@ -74,20 +74,53 @@
// Mallocs a StridedMemRefDescriptor<T, N>* that matches the MLIR ABI.
// This is an implementation detail that is kept in sync with MLIR codegen
+// conventions. Additionally takes a `shapeAlloc` array which
+// is used instead of `shape` to allocate "more aligned" data and compute the
+// corresponding strides.
+template <typename T, int N>
+typename std::enable_if<(N >= 1), StridedMemRefType<T, N> *>::type
+makeStridedMemRefDescriptor(void *ptr, void *alignedPtr,
+ const std::array<int64_t, N> &shape,
+ const std::array<int64_t, N> &shapeAlloc,
+ AllocFunType allocFun = &::malloc) {
+ StridedMemRefType<T, N> *descriptor = static_cast<StridedMemRefType<T, N> *>(
+ allocFun(sizeof(StridedMemRefType<T, N>)));
+ descriptor->basePtr = static_cast<T *>(ptr);
+ descriptor->data = static_cast<T *>(alignedPtr);
+ descriptor->offset = 0;
+ std::copy(shape.begin(), shape.end(), descriptor->sizes);
+ auto strides = makeStrides<N>(shapeAlloc);
+ std::copy(strides.begin(), strides.end(), descriptor->strides);
+ return descriptor;
+}
+
+// Mallocs a StridedMemRefDescriptor<T, N>* that matches the MLIR ABI.
+// This is an implementation detail that is kept in sync with MLIR codegen
// conventions.
template <typename T, int N>
typename std::enable_if<(N >= 1), StridedMemRefType<T, N> *>::type
makeStridedMemRefDescriptor(void *ptr, void *alignedPtr,
const std::array<int64_t, N> &shape,
- AllocFunType alloc = &::malloc) {
- StridedMemRefType<T, N> *descriptor = static_cast<StridedMemRefType<T, N> *>(
- alloc(sizeof(StridedMemRefType<T, N>)));
+ AllocFunType allocFun = &::malloc) {
+ return makeStridedMemRefDescriptor(ptr, alignedPtr, shape, shape, allocFun);
+}
+
+// Mallocs a StridedMemRefDescriptor<T, 0>* that matches the MLIR ABI.
+// This is an implementation detail that is kept in sync with MLIR codegen
+// conventions. Additionally takes a `shapeAlloc` array which
+// is used instead of `shape` to allocate "more aligned" data and compute the
+// corresponding strides.
+template <typename T, int N>
+typename std::enable_if<(N == 0), StridedMemRefType<T, 0> *>::type
+makeStridedMemRefDescriptor(void *ptr, void *alignedPtr,
+ const std::array<int64_t, N> &shape = {},
+ const std::array<int64_t, N> &shapeAlloc = {},
+ AllocFunType allocFun = &::malloc) {
+ StridedMemRefType<T, 0> *descriptor = static_cast<StridedMemRefType<T, 0> *>(
+ allocFun(sizeof(StridedMemRefType<T, 0>)));
descriptor->basePtr = static_cast<T *>(ptr);
descriptor->data = static_cast<T *>(alignedPtr);
descriptor->offset = 0;
- std::copy(shape.begin(), shape.end(), descriptor->sizes);
- auto strides = makeStrides<N>(shape);
- std::copy(strides.begin(), strides.end(), descriptor->strides);
return descriptor;
}
@@ -98,13 +131,8 @@
typename std::enable_if<(N == 0), StridedMemRefType<T, 0> *>::type
makeStridedMemRefDescriptor(void *ptr, void *alignedPtr,
const std::array<int64_t, N> &shape = {},
- AllocFunType alloc = &::malloc) {
- StridedMemRefType<T, 0> *descriptor = static_cast<StridedMemRefType<T, 0> *>(
- alloc(sizeof(StridedMemRefType<T, 0>)));
- descriptor->basePtr = static_cast<T *>(ptr);
- descriptor->data = static_cast<T *>(alignedPtr);
- descriptor->offset = 0;
- return descriptor;
+ AllocFunType allocFun = &::malloc) {
+ return makeStridedMemRefDescriptor(ptr, alignedPtr, shape, shape, allocFun);
}
// Mallocs an UnrankedMemRefType<T>* that contains a ranked
@@ -113,9 +141,9 @@
template <typename T, int N>
::UnrankedMemRefType<T> *allocUnrankedDescriptor(
void *data, void *alignedData, const std::array<int64_t, N> &shape,
- AllocFunType alloc = &::malloc) {
+ AllocFunType allocFun = &::malloc) {
::UnrankedMemRefType<T> *res = static_cast<::UnrankedMemRefType<T> *>(
- alloc(sizeof(::UnrankedMemRefType<T>)));
+ allocFun(sizeof(::UnrankedMemRefType<T>)));
res->rank = N;
res->descriptor = makeStridedMemRefDescriptor<T, N>(data, alignedData, shape);
return res;
@@ -157,14 +185,14 @@
// and greater than the size of T. By default the alignment is sizeof(T).
template <typename T>
std::pair<void *, void *> allocAligned(
- size_t nElements, AllocFunType alloc = &::malloc,
+ size_t nElements, AllocFunType allocFun = &::malloc,
llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>()) {
assert(sizeof(T) < (1ul << 32) && "Elemental type overflows");
auto size = nElements * sizeof(T);
auto desiredAlignment = alignment.getValueOr(pow2msb(sizeof(T)));
assert((desiredAlignment & (desiredAlignment - 1)) == 0);
assert(desiredAlignment >= sizeof(T));
- void *data = alloc(size + desiredAlignment);
+ void *data = allocFun(size + desiredAlignment);
uintptr_t addr = reinterpret_cast<uintptr_t>(data);
uintptr_t rem = addr % desiredAlignment;
void *alignedData =
@@ -175,43 +203,48 @@
}
// Entry point to allocate a dense buffer with a given `shape` and initializer
-// of type PointwiseInitializer. Can optionally take specific `alloc` and `free`
-// functions.
+// of type PointwiseInitializer. Additionally takes a `shapeAlloc` array which
+// is used instead of `shape` to allocate "more aligned" data and compute the
+// corresponding strides.
+// Can optionally take specific alloc and free functions.
+//
+// Example:
+// When called with `shape = [128, 127]` and `shapeAlloc = [128, 128]`, this
+// allocates a memref with `128*128*sizeof(T)` bytes, `sizes = [128, 127]` and
+// `strides = [128, 1]`.
template <typename T, int N, typename FreeFunType = decltype(&::free)>
-std::unique_ptr<::UnrankedMemRefType<float>, FreeFunType>
-makeInitializedUnrankedDescriptor(
- const std::array<int64_t, N> &shape, LinearInitializer<T> init,
+std::unique_ptr<StridedMemRefType<T, N>, FreeFunType>
+makeInitializedStridedMemRefDescriptor(
+ const std::array<int64_t, N> &shape,
+ const std::array<int64_t, N> &shapeAlloc, LinearInitializer<T> init,
llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>(),
- AllocFunType alloc = &::malloc, FreeFunType freeFun = &::free) {
+ AllocFunType allocFun = &::malloc, FreeFunType freeFun = &::free) {
+ for (unsigned i = 0; i < N; ++i)
+ assert(shape[i] <= shapeAlloc[i] &&
+ "shapeAlloc must be greater than or equal to shape");
int64_t nElements = 1;
- for (int64_t s : shape) nElements *= s;
- auto allocated = allocAligned<T>(nElements, alloc, alignment);
+ for (int64_t s : shapeAlloc) nElements *= s;
+ auto allocated = allocAligned<T>(nElements, allocFun, alignment);
auto *data = static_cast<T *>(allocated.first);
auto *alignedData = static_cast<T *>(allocated.second);
for (unsigned i = 0; i < nElements; ++i) init(i, alignedData);
- return std::unique_ptr<::UnrankedMemRefType<float>, FreeFunType>(
- detail::allocUnrankedDescriptor<T, N>(data, alignedData, shape), freeFun);
+ return std::unique_ptr<StridedMemRefType<T, N>, FreeFunType>(
+ detail::makeStridedMemRefDescriptor<T, N>(data, alignedData, shape,
+ shapeAlloc, allocFun),
+ freeFun);
}
// Entry point to allocate a dense buffer with a given `shape` and initializer
-// of type PointwiseInitializer. Can optionally take specific `alloc` and `free`
+// of type PointwiseInitializer. Can optionally take specific alloc and free
// functions.
template <typename T, int N, typename FreeFunType = decltype(&::free)>
std::unique_ptr<StridedMemRefType<T, N>, FreeFunType>
makeInitializedStridedMemRefDescriptor(
const std::array<int64_t, N> &shape, LinearInitializer<T> init,
llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>(),
- AllocFunType alloc = &::malloc, FreeFunType freeFun = &::free) {
- int64_t nElements = 1;
- for (int64_t s : shape) nElements *= s;
- auto allocated = allocAligned<T>(nElements, alloc, alignment);
- auto *data = static_cast<T *>(allocated.first);
- auto *alignedData = static_cast<T *>(allocated.second);
- for (unsigned i = 0; i < nElements; ++i) init(i, alignedData);
- return std::unique_ptr<StridedMemRefType<T, N>, FreeFunType>(
- detail::makeStridedMemRefDescriptor<T, N>(data, alignedData, shape,
- alloc),
- freeFun);
+ AllocFunType allocFun = &::malloc, FreeFunType freeFun = &::free) {
+ return makeInitializedStridedMemRefDescriptor<T, N>(
+ shape, shape, init, alignment, allocFun, freeFun);
}
} // namespace mlir
diff --git a/experimental/ModelBuilder/ModelRunner.h b/experimental/ModelBuilder/ModelRunner.h
index 3539dbe..8afb37a 100644
--- a/experimental/ModelBuilder/ModelRunner.h
+++ b/experimental/ModelBuilder/ModelRunner.h
@@ -116,7 +116,10 @@
}
// Direct invocation based on MemRefType which automatically packs the data.
template <typename... Args>
- llvm::Error invoke(StringRef funcName, Args &... args) {
+ // TODO(suderman): Re-enable clang-format when new version migrates.
+ // clang-format off
+ llvm::Error invoke(StringRef funcName, Args &...args) {
+ // clang-format on
const std::string adapterName =
std::string("_mlir_ciface_") + funcName.str();
void *argsArray[] = {getData(args)...};
diff --git a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_test_utils.py b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_test_utils.py
index 9cc1c93..f2c6151 100644
--- a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_test_utils.py
+++ b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_test_utils.py
@@ -264,8 +264,7 @@
return self.multi()
reinitialized_modules = [
- tf_utils.CompiledModule.from_existing(module)
- for module in compiled_backends.values()
+ module.create_reinitialized() for module in compiled_backends.values()
]
return VirtualBackendsClass(*reinitialized_modules)
@@ -366,10 +365,12 @@
try:
backends = get_backends()
cls._compiled_backends_dict = {}
- for backend in backends:
- compiled_backend = tf_utils.CompiledModule.compile(
- cls._module_class, backend, cls._exported_names, global_debug_dir)
- cls._compiled_backends_dict[backend.name] = compiled_backend
+ for backend_info in backends:
+ compiled_backend = backend_info.CompiledModule(cls._module_class,
+ backend_info,
+ cls._exported_names,
+ global_debug_dir)
+ cls._compiled_backends_dict[backend_info.name] = compiled_backend
finally:
# Disable crash reproducer (to avoid inadvertently overwriting this
# path on a subsequent interaction).
diff --git a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils.py b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils.py
index 4ef66d1..46a3785 100644
--- a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils.py
+++ b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils.py
@@ -39,22 +39,32 @@
np.random.seed(seed)
+def backends_to_str(target_backends):
+ """Creates a flattened and normalized string representing target_backends."""
+ normalized_backends = []
+ for backend in target_backends:
+ # Remove unusual characters and ensure names don't end or start in "_".
+ backend = re.sub("[^0-9a-zA-Z_]+", "_", backend)
+ normalized_backends.append(backend.strip("_"))
+ return "__".join(normalized_backends)
+
+
def compile_tf_module(tf_module,
target_backends=(),
exported_names=(),
artifacts_dir=None):
"""Compiles a TensorFlow tf.Module and optionally saves compilation artifacts.
- The artifact this creates is not callable. See IreeCompiledModule.compile(...)
- for an API that returns a module that can be called without any further steps.
+ The artifact this creates is not callable. See IreeCompiledModule for an API
+ that returns a module that can be called without any further steps.
If artifacts_dir is provided then the following artifacts will be saved:
saved_model:
A TF SavedModel directory containing the files used translate the
tf.Module into an IREE module.
- tf_input__backends.mlir:
+ tf_input.mlir:
MLIR for the module in TF's input dialect.
- iree_input__backends.mlir:
+ iree_input.mlir:
The MLIR above translated to IREE via compiler.TF_IMPORT_PASS_PIPELINE.
compiled__backends.vmfb:
A VM FlatBuffer compiled to the target backends from the IREE MLIR above.
@@ -77,14 +87,6 @@
# We break up the compilation here so we can save intermediary artifacts.
compiler_context = compiler.Context()
- if artifacts_dir is not None:
- normalized_backends = []
- for backend in target_backends:
- # Remove unusual characters and ensure names don't end or start in "_".
- backend = re.sub("[^0-9a-zA-Z_]+", "_", backend)
- normalized_backends.append(backend.strip("_"))
- backends_string = "__".join(normalized_backends)
-
# Convert the tf_module into raw TF input MLIR.
compiler_module = compiler.tf_load_saved_model(
sm_path,
@@ -93,8 +95,7 @@
pass_pipeline=())
if artifacts_dir is not None:
- tf_mlir_path = os.path.join(artifacts_dir,
- f"tf_input__{backends_string}.mlir")
+ tf_mlir_path = os.path.join(artifacts_dir, "tf_input.mlir")
logging.info("Saving raw TF input MLIR to: %s", tf_mlir_path)
with open(tf_mlir_path, "w") as f:
f.write(compiler_module.to_asm())
@@ -103,16 +104,15 @@
compiler_module.run_pass_pipeline(compiler.TF_IMPORT_PASS_PIPELINE)
if artifacts_dir is not None:
- iree_mlir_path = os.path.join(artifacts_dir,
- f"iree_input__{backends_string}.mlir")
+ iree_mlir_path = os.path.join(artifacts_dir, "iree_input.mlir")
logging.info("Saving IREE input MLIR to: %s", iree_mlir_path)
with open(iree_mlir_path, "w") as f:
f.write(compiler_module.to_asm())
compiled_module = compiler_module.compile(target_backends=target_backends)
if artifacts_dir is not None:
- compiled_path = os.path.join(artifacts_dir,
- f"compiled__{backends_string}.vmfb")
+ compiled_name = f"compiled__{backends_to_str(target_backends)}.vmfb"
+ compiled_path = os.path.join(artifacts_dir, compiled_name)
logging.info("Saving compiled IREE module to: %s", compiled_path)
with open(compiled_path, "wb") as f:
f.write(compiled_module)
@@ -133,51 +133,29 @@
class CompiledModule(object):
- """Base class for the TF and IREE compiled module facades."""
-
- @staticmethod
- def compile(module_class,
- backend_info,
- exported_names=(),
- artifacts_dir=None):
- """Compile a tf.Module using the CompiledModule subclass in backend_info.
-
- Args:
- module_class: the tf.Module subclass to compile.
- backend_info: an element of BackendInfo corresponding to the backend to
- compile to. If a TF 'backend' is provided then the module is wrapped in
- a TfCompiledModule.
- exported_names: an optional iterable of strings representing which of the
- module_class's functions to compile. If exported_names is empty all
- functions will be compiled.
- artifacts_dir: an optional path to save compilation artifacts to.
- """
- compile = backend_info.CompiledModule.compile
- return compile(module_class, backend_info, exported_names, artifacts_dir)
-
- @staticmethod
- def from_existing(module):
- """Duplicates 'module' with the tf.Module's state without recompiling."""
- # Use the backend_info attr to determine which subclass' constructor to use.
- from_existing = module._backend_info.CompiledModule.from_existing
- return from_existing(module)
+ """Base class for the TF and IREE compiled modules."""
def __init__(self, module_class, backend_info, exported_names, artifacts_dir):
- """Default constructor – use `compile` or `from_existing` instead."""
+ """Shared base constructor – not useful on its own."""
self._module_class = module_class
self._backend_info = backend_info
self._exported_names = exported_names
self._artifacts_dir = artifacts_dir
+ def create_reinitialized(self):
+ """Duplicates this module with its initial state without recompiling."""
+ raise NotImplementedError()
+
class IreeCompiledModule(CompiledModule):
"""Iree compiled module."""
- @staticmethod
- def compile(module_class,
- backend_info,
- exported_names=(),
- artifacts_dir=None):
+ def __init__(self,
+ module_class,
+ backend_info,
+ exported_names=[],
+ artifacts_dir=None,
+ _create_reinitialized_args=None):
"""Compile a tf.Module to the target backend in backend_info.
Args:
@@ -189,30 +167,9 @@
functions will be compiled.
artifacts_dir: an optional path to save compilation artifacts to.
"""
- return IreeCompiledModule(module_class, backend_info, exported_names,
- artifacts_dir)
-
- @staticmethod
- def from_existing(module):
- """Duplicates 'module' with the tf.Module's state without recompiling."""
- default_args = [
- module._module_class, module._backend_info, module._exported_names,
- module._artifacts_dir
- ]
- from_existing_args = [module._module_blob, module._module, module._config]
- return IreeCompiledModule(*default_args, from_existing_args)
-
- def __init__(self,
- module_class,
- backend_info,
- exported_names,
- artifacts_dir,
- _from_existing_args=None):
- """Default constructor – use `compile` or `from_existing` instead."""
super().__init__(module_class, backend_info, exported_names, artifacts_dir)
- if _from_existing_args is None:
- # Called from IreeCompiledModule.compile(...)
+ if _create_reinitialized_args is None:
self._module_blob = compile_tf_module(
tf_module=module_class(),
target_backends=backend_info.iree_compiler_targets,
@@ -221,13 +178,22 @@
self._module = rt.VmModule.from_flatbuffer(self._module_blob)
self._config = rt.Config(driver_name=backend_info.iree_driver)
else:
- # Called from IreeCompiledModule.from_existing(module)
- self._module_blob, self._module, self._config = _from_existing_args
+ # Called from self.create_reinitialized()
+ self._module_blob, self._module, self._config = _create_reinitialized_args
# Holds all of the module's mutable state.
self._context = rt.SystemContext(
modules=[self._module], config=self._config)
+ def create_reinitialized(self):
+ """Duplicates this module with its initial state without recompiling."""
+ default_args = [
+ self._module_class, self._backend_info, self._exported_names,
+ self._artifacts_dir
+ ]
+ create_reinitialized_args = [self._module_blob, self._module, self._config]
+ return IreeCompiledModule(*default_args, create_reinitialized_args)
+
def __getattr__(self, attr):
# Try to resolve it as a function.
m = self._context.modules[self._module.name]
@@ -253,11 +219,11 @@
normalize TensorFlow's output to Numpy.
"""
- @staticmethod
- def compile(module_class,
- backend_info,
- exported_names=(),
- artifacts_dir=None):
+ def __init__(self,
+ module_class,
+ backend_info,
+ exported_names=[],
+ artifacts_dir=None):
"""Wrap a tf.Module in a TFCompiledModule facade.
Args:
@@ -269,23 +235,14 @@
artifacts_dir: an optional path to save compilation artifacts to. Has no
effect for this subclass as nothing is compiled.
"""
- return TfCompiledModule(module_class, backend_info, exported_names,
- artifacts_dir)
-
- @staticmethod
- def from_existing(module):
- """Duplicates 'module's facade with the starting state of module_class."""
- duplicate_module = TfCompiledModule(module._module_class,
- module._backend_info,
- module._exported_names,
- module._artifacts_dir)
- return duplicate_module
-
- def __init__(self, module_class, backend_info, exported_names, artifacts_dir):
- """Default constructor – use `compile` or `from_existing` instead."""
super().__init__(module_class, backend_info, exported_names, artifacts_dir)
self._tf_module = module_class()
+ def create_reinitialized(self):
+ """Duplicates this module with the starting state of module_class."""
+ return TfCompiledModule(self._module_class, self._backend_info,
+ self._exported_names, self._artifacts_dir)
+
def __getattr__(self, attr):
# Try to resolve it as a function.
exported = len(self._exported_names) == 0 or attr in self._exported_names
diff --git a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils_test.py b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils_test.py
index dcc3aec..b1d9adb 100644
--- a/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils_test.py
+++ b/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils_test.py
@@ -17,6 +17,7 @@
import os
import tempfile
+from absl import logging
from absl.testing import parameterized
from pyiree.tf.support import tf_utils
import tensorflow as tf
@@ -52,7 +53,7 @@
},
{
'testcase_name': 'multiple_backends',
- 'target_backends': ['vmla', 'llvm'],
+ 'target_backends': ['vmla', 'llvm-ir'],
},
])
def test_artifact_saving(self, target_backends):
@@ -65,12 +66,14 @@
artifacts_to_check = [
'saved_model',
- f'tf_input__{"__".join(target_backends)}.mlir',
- f'iree_input__{"__".join(target_backends)}.mlir',
- f'compiled__{"__".join(target_backends)}.vmfb',
+ 'tf_input.mlir',
+ 'iree_input.mlir',
+ f'compiled__{tf_utils.backends_to_str(target_backends)}.vmfb',
]
for artifact in artifacts_to_check:
- self.assertTrue(os.path.exists(os.path.join(artifacts_dir, artifact)))
+ artifact_path = os.path.join(artifacts_dir, artifact)
+ logging.info('Checking path: %s', artifact_path)
+ self.assertTrue(os.path.exists(artifact_path))
@parameterized.named_parameters([
{
@@ -83,15 +86,15 @@
},
])
def test_unaltered_state(self, backend_name):
- info = tf_utils.BackendInfo.ALL[backend_name]
- module = tf_utils.CompiledModule.compile(StatefulCountingModule, info)
+ backend_info = tf_utils.BackendInfo.ALL[backend_name]
+ module = backend_info.CompiledModule(StatefulCountingModule, backend_info)
# Test that incrementing works properly.
self.assertEqual([0.], module.get_count())
module.increment()
self.assertEqual([1.], module.get_count())
- reinitialized_module = tf_utils.CompiledModule.from_existing(module)
+ reinitialized_module = module.create_reinitialized()
# Test reinitialization.
self.assertEqual([0.], reinitialized_module.get_count())
# Test independent state.
diff --git a/integrations/tensorflow/compiler/BUILD b/integrations/tensorflow/compiler/BUILD
index 080ea20..722e04b 100644
--- a/integrations/tensorflow/compiler/BUILD
+++ b/integrations/tensorflow/compiler/BUILD
@@ -62,6 +62,7 @@
"//iree/tools:iree_opt_main",
"@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration",
"@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
+ "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tf_saved_model_passes",
"@org_tensorflow//tensorflow/compiler/mlir/xla:xla_legalize_tf",
],
)
diff --git a/integrations/tensorflow/e2e/BUILD b/integrations/tensorflow/e2e/BUILD
index 2fd4545..d63b839 100644
--- a/integrations/tensorflow/e2e/BUILD
+++ b/integrations/tensorflow/e2e/BUILD
@@ -93,6 +93,7 @@
"matrix_ops_test.py",
"ring_buffer_test.py", # TODO(b/148747011)
"scatter_update_test.py",
+ "sliding_window_test.py", # TODO(#2659)
"strings_test.py",
]
diff --git a/integrations/tensorflow/e2e/README.md b/integrations/tensorflow/e2e/README.md
index 341aa56..04604d8 100644
--- a/integrations/tensorflow/e2e/README.md
+++ b/integrations/tensorflow/e2e/README.md
@@ -26,10 +26,10 @@
## Compiling `tf.Module`s
Compatible TensorFlow modules can be compiled to specific IREE backends using
-`IreeCompiledModule.compile(...)`. This also optionally saves compilation
-artifacts to a specified directory. These artifacts include: MLIR across various
-lowerings, a TensorFlow SavedModel, and the compiled VM FlatBuffer. A basic
-example of creating and calling an `IreeCompiledModule` can be found in
+`IreeCompiledModule`. This also optionally saves compilation artifacts to a
+specified directory. These artifacts include: MLIR across various lowerings, a
+TensorFlow SavedModel, and the compiled VM FlatBuffer. A basic example of
+creating and calling an `IreeCompiledModule` can be found in
[`tf_utils_test.py`](https://github.com/google/iree/blob/main/integrations/tensorflow/bindings/python/pyiree/tf/support/tf_utils_test.py)
When using Keras models or tf.Modules with functions that IREE can't compile,
@@ -38,7 +38,7 @@
```python
from pyiree.tf.support import tf_utils
vmla_module = tf_utils.IreeCompiledModule(
- constructor=KerasTFModuleClass,
+ module_class=KerasTFModuleClass,
backend_info=tf_utils.BackendInfo.ALL['iree_vmla'],
exported_names=['predict'])
vmla_module.predict(...)
diff --git a/integrations/tensorflow/e2e/keras/vision_model_test.py b/integrations/tensorflow/e2e/keras/vision_model_test.py
index 4f3917e..54374ff 100644
--- a/integrations/tensorflow/e2e/keras/vision_model_test.py
+++ b/integrations/tensorflow/e2e/keras/vision_model_test.py
@@ -152,5 +152,6 @@
tf.test.main()
+
if __name__ == '__main__':
app.run(main)
diff --git a/iree/base/BUILD b/iree/base/BUILD
index 052bd1d..8d6b446 100644
--- a/iree/base/BUILD
+++ b/iree/base/BUILD
@@ -435,6 +435,15 @@
],
)
+cc_test(
+ name = "time_test",
+ srcs = ["time_test.cc"],
+ deps = [
+ ":time",
+ "//iree/testing:gtest_main",
+ ],
+)
+
cc_library(
name = "tracing",
hdrs = ["tracing.h"],
diff --git a/iree/base/CMakeLists.txt b/iree/base/CMakeLists.txt
index a6dccb2..322bf43 100644
--- a/iree/base/CMakeLists.txt
+++ b/iree/base/CMakeLists.txt
@@ -524,6 +524,16 @@
PUBLIC
)
+iree_cc_test(
+ NAME
+ time_test
+ SRCS
+ "time_test.cc"
+ DEPS
+ ::time
+ iree::testing::gtest_main
+)
+
if(${IREE_ENABLE_RUNTIME_TRACING})
iree_cc_library(
NAME
diff --git a/iree/base/api.h b/iree/base/api.h
index e1b5bdc..ba8ec9b 100644
--- a/iree/base/api.h
+++ b/iree/base/api.h
@@ -374,7 +374,7 @@
// Like absl::Duration, represented as relative nanoseconds.
typedef int64_t iree_duration_t;
// Like absl::InfiniteDuration.
-#define IREE_DURATION_INFINITE INT64_MIN
+#define IREE_DURATION_INFINITE INT64_MAX
// Like absl::ZeroDuration.
#define IREE_DURATION_ZERO 0
diff --git a/iree/base/signature_mangle.h b/iree/base/signature_mangle.h
index 948dbb5..8afb4b5 100644
--- a/iree/base/signature_mangle.h
+++ b/iree/base/signature_mangle.h
@@ -156,7 +156,7 @@
// -----------------------------------------------------------------------------
// Mangles raw function signatures.
-// See function_abi.md.
+// See docs/design_docs/function_abi.md.
class RawSignatureMangler {
public:
static SignatureBuilder ToFunctionSignature(const SignatureBuilder& inputs,
@@ -364,7 +364,8 @@
// Mangles function signatures according to the Sip (Structured Index Path) V1
// scheme.
//
-// Mangler for the 'sip' ABI. See function_abi.md in the documentation.
+// Mangler for the 'sip' ABI. See docs/design_docs/function_abi.md in the
+// documentation.
class SipSignatureMangler {
public:
enum class IndexMode {
@@ -443,7 +444,8 @@
// Parser for signatures generated by SipSignatureMangler.
// This uses a Visitor interface to walk either input or result structs.
//
-// Mangler for the 'sip' ABI. See function_abi.md in the documentation.
+// Mangler for the 'sip' ABI. See docs/design_docs/function_abi.md in the
+// documentation.
class SipSignatureParser {
public:
enum class StructType {
diff --git a/iree/base/time.h b/iree/base/time.h
index e88b3b8..e51a5b1 100644
--- a/iree/base/time.h
+++ b/iree/base/time.h
@@ -15,6 +15,9 @@
#ifndef IREE_BASE_TIME_H_
#define IREE_BASE_TIME_H_
+#include <type_traits>
+#include <utility>
+
#include "iree/base/api.h"
namespace iree {
@@ -43,7 +46,7 @@
return !(lhs == rhs);
}
friend inline bool operator<(const ChronoType& lhs, const ChronoType& rhs) {
- return rhs.value_ < lhs.value_;
+ return lhs.value_ < rhs.value_;
}
friend inline bool operator>(const ChronoType& lhs, const ChronoType& rhs) {
return rhs < lhs;
diff --git a/iree/base/time_test.cc b/iree/base/time_test.cc
new file mode 100644
index 0000000..114cd4e
--- /dev/null
+++ b/iree/base/time_test.cc
@@ -0,0 +1,45 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/time.h"
+
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace {
+
+TEST(Time, DurationComparisons) {
+ EXPECT_TRUE(Milliseconds(123) == Milliseconds(123));
+ EXPECT_FALSE(Milliseconds(123) == Milliseconds(456));
+ EXPECT_FALSE(Milliseconds(123) != Milliseconds(123));
+ EXPECT_TRUE(Milliseconds(123) != Milliseconds(456));
+
+ EXPECT_TRUE(Milliseconds(123) < Milliseconds(456));
+ EXPECT_FALSE(Milliseconds(123) > Milliseconds(456));
+ EXPECT_FALSE(Milliseconds(123) > Milliseconds(123));
+ EXPECT_FALSE(Milliseconds(123) < Milliseconds(123));
+
+ EXPECT_TRUE(Milliseconds(123) <= Milliseconds(123));
+ EXPECT_TRUE(Milliseconds(123) >= Milliseconds(123));
+ EXPECT_TRUE(Milliseconds(123) <= Milliseconds(456));
+ EXPECT_FALSE(Milliseconds(123) >= Milliseconds(456));
+}
+
+TEST(Time, DurationArithmetic) {
+ EXPECT_EQ(Milliseconds(150), Milliseconds(100) + Milliseconds(50));
+ EXPECT_EQ(Milliseconds(50), Milliseconds(100) - Milliseconds(50));
+}
+
+} // namespace
+} // namespace iree
diff --git a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
index 99f7eb1..90178fc 100644
--- a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
+++ b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
@@ -27,6 +27,7 @@
#include "iree/compiler/Dialect/IREE/IR/IREEOps.h"
#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
@@ -444,7 +445,7 @@
rewriter.getI64IntegerAttr(1), // args_out
rewriter.getArrayAttr(indexingMaps),
getParallelAndReductionIterAttrs(rewriter, nloops, nonParallelLoops),
- /*doc=*/nullptr, /*library_call=*/nullptr);
+ /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
// Add a block to the region.
auto *region = &linalgOp.region();
@@ -523,11 +524,6 @@
LogicalResult PadOpConversion::apply(
mhlo::PadOp op, ArrayRef<Value> inputBuffers, ArrayRef<Value> resultBuffers,
ConversionPatternRewriter &rewriter) const {
- if (llvm::any_of(op.interior_padding().getValues<IntegerAttr>(),
- [](auto attr) { return attr.getInt() != 0; }))
- return op.emitError(
- "pad op with non-zero interiror_padding is not supported");
-
mhlo::PadOp::Adaptor adaptor(inputBuffers);
auto loc = op.getLoc();
@@ -535,84 +531,31 @@
Value paddingVal =
paddingConstVal
? rewriter.create<ConstantOp>(loc, paddingConstVal).getResult()
- : adaptor.padding_value();
+ : rewriter.create<LoadOp>(loc, adaptor.padding_value());
- auto operandType = adaptor.operand().getType().cast<ShapedType>();
- int rank = operandType.getRank();
-
- SmallVector<Attribute, 2> indexingMaps;
- indexingMaps.emplace_back(getPadOpInputIndexingMap(op, rank, rewriter));
- if (!paddingConstVal) {
- indexingMaps.emplace_back(AffineMapAttr::get(
- AffineMap::get(rank, /*symbolCount=*/0, rewriter.getContext())));
- }
- indexingMaps.emplace_back(AffineMapAttr::get(
- AffineMap::getMultiDimIdentityMap(rank, rewriter.getContext())));
-
- SmallVector<Type, 2> resultTypes = {};
- SmallVector<Value, 2> linalgOpArgs = {adaptor.operand()};
- if (!paddingConstVal) linalgOpArgs.push_back(adaptor.padding_value());
- linalgOpArgs.push_back(resultBuffers[0]);
- auto linalgOp = rewriter.create<linalg::IndexedGenericOp>(
- loc, resultTypes, linalgOpArgs,
- rewriter.getI64IntegerAttr(linalgOpArgs.size() - 1), // args_in
- rewriter.getI64IntegerAttr(1), // args_out
- rewriter.getArrayAttr(indexingMaps),
- getParallelAndReductionIterAttrs(rewriter, rank, /*nReduction=*/0),
- /*doc=*/nullptr, /*library_call=*/nullptr);
-
- // Add a block to the region.
- auto *region = &linalgOp.region();
- auto *block = rewriter.createBlock(region, region->end());
- SmallVector<Type, 4> bodyArgTypes;
- bodyArgTypes.append(rank, rewriter.getIndexType());
- bodyArgTypes.append(linalgOpArgs.size(), operandType.getElementType());
- block->addArguments(bodyArgTypes);
- rewriter.setInsertionPointToEnd(block);
-
- // If the `index` of the result at a particular dimension i, is d_i, check if
- //
- // (d_i >= edge_padding_low[i]) &&
- // (d_i < (edge_padding_low[i] + operand_shape[i])).
- //
- // If true, then use the value of the operand, otherwise use the padding
- // value.
const auto &edgePaddingLow = op.edge_padding_low();
- const auto &edgePaddingHigh = op.edge_padding_high();
-
- Type indexType = rewriter.getIndexType();
- Value cond = nullptr;
- auto applyAndOp = [&](Value val) {
- cond = cond ? rewriter.create<AndOp>(loc, cond, val) : val;
- };
- for (int i = 0; i < rank; ++i) {
- Value dim = block->getArgument(i);
- int64_t paddingLow = edgePaddingLow.getValue<IntegerAttr>(i).getInt();
- int64_t paddingHigh = edgePaddingHigh.getValue<IntegerAttr>(i).getInt();
- auto low = rewriter.create<ConstantOp>(
- loc, indexType, rewriter.getIntegerAttr(indexType, paddingLow));
-
- // d_i < (edge_padding_low[i] + operand_shape[i])
- if (paddingLow != 0 && paddingHigh != 0) {
- auto operandExtent = rewriter.create<DimOp>(loc, adaptor.operand(), i);
- auto bound = rewriter.create<AddIOp>(loc, low, operandExtent);
- auto checkUb =
- rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, dim, bound);
- applyAndOp(checkUb);
- }
-
- if (paddingLow != 0) {
- // d_i >= edge_padding_low[i]
- auto checkLb = rewriter.create<CmpIOp>(loc, CmpIPredicate::sge, dim, low);
- applyAndOp(checkLb);
- }
+ const auto &interiorPadding = op.interior_padding();
+ SmallVector<Value, 3> offsets, sizes, strides;
+ for (auto it : llvm::enumerate(llvm::zip(edgePaddingLow, interiorPadding))) {
+ Value startIndex = rewriter.create<ConstantIndexOp>(
+ loc, std::get<0>(it.value()).getZExtValue());
+ offsets.push_back(startIndex);
+ Value size = rewriter.create<DimOp>(loc, resultBuffers[0], it.index());
+ sizes.push_back(size);
+ Value stride = rewriter.create<ConstantIndexOp>(
+ loc, std::get<1>(it.value()).getZExtValue() + 1);
+ strides.push_back(stride);
}
- Value inputVal = block->getArgument(rank);
- if (!paddingConstVal) paddingVal = block->getArgument(rank + 1);
- Value result =
- cond ? rewriter.create<SelectOp>(loc, cond, inputVal, paddingVal)
- : inputVal;
- rewriter.create<linalg::YieldOp>(loc, result);
+
+ // TODO(hanchung): Move SubViewOp this down to before where it is used.
+ // The pass for splitting dispatch function for vulkan requires no other ops
+ // interleave with Linalg structured ops, so put the SubViewOp in the
+ // beginning.
+ auto subViewOp = rewriter.create<SubViewOp>(loc, resultBuffers[0], offsets,
+ sizes, strides);
+ rewriter.create<linalg::FillOp>(loc, resultBuffers[0], paddingVal);
+ rewriter.create<linalg::CopyOp>(loc, inputBuffers[0], subViewOp);
+
return success();
}
@@ -691,8 +634,9 @@
int batch = op.batch_dims().getSExtValue();
auto indexShapeType = adaptor.index().getType().dyn_cast<ShapedType>();
int nIndices = indexShapeType.getRank();
- if (batch < 0)
- return op.emitError("expected batch_dims is greater than or equal to zero");
+ auto inputShapeType = adaptor.input().getType().dyn_cast<ShapedType>();
+ if (axis < 0) axis += inputShapeType.getRank();
+ if (batch < 0) batch += nIndices;
Location loc = op.getLoc();
Value output = op.getResult();
@@ -715,7 +659,7 @@
rewriter.getI64IntegerAttr(1), // args_out
rewriter.getArrayAttr(indexingMaps),
getParallelAndReductionIterAttrs(rewriter, rank, /*nReduction=*/0),
- /*doc=*/nullptr, /*library_call=*/nullptr);
+ /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
// Add a block to the region.
auto *region = &linalgOp.region();
@@ -1019,7 +963,7 @@
rewriter.getArrayAttr(indexingMaps),
getParallelAndReductionIterAttrs(rewriter, nInputRank,
reductionDims.size()),
- /*doc=*/nullptr, /*library_call=*/nullptr);
+ /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
linalgOp.region().takeBody(reduceOp.body());
{
@@ -1095,7 +1039,8 @@
op.getLoc(), ArrayRef<Type>(), opArgs, op.args_in(), op.args_out(),
op.indexing_maps(), op.iterator_types(),
/*doc=*/nullptr,
- /*library_call=*/nullptr);
+ /*library_call=*/nullptr,
+ /*symbol_source=*/nullptr);
// Move the region from the replaced op into the new op.
unsigned numTensorOperands = op.getNumOperands();
// indexed_generic op has arguments for each index. In the case of generic
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir b/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir
index a167ef4..8d1d96d 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir
@@ -5,7 +5,8 @@
func @pad_cst() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
- // CHECK: linalg.indexed_generic
+ // CHECK: linalg.fill
+ // CHECK: linalg.copy
%1 = constant dense<0.0> : tensor<f32>
%2 = "mhlo.pad"(%0, %1) {
edge_padding_high = dense<[2, 3]> : tensor<2xi64>,
@@ -29,7 +30,8 @@
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
%1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
- // CHECK: linalg.indexed_generic
+ // CHECK: linalg.fill
+ // CHECK: linalg.copy
%2 = "mhlo.pad"(%0, %1) {
edge_padding_high = dense<[2, 3]> : tensor<2xi64>,
edge_padding_low = dense<[4, 5]> : tensor<2xi64>,
@@ -52,7 +54,8 @@
func @pad_no_op() {
%c0 = constant 0 : index
%0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
- // CHECK: linalg.indexed_generic
+ // CHECK: linalg.fill
+ // CHECK: linalg.copy
%1 = constant dense<0.0> : tensor<f32>
%2 = "mhlo.pad"(%0, %1) {
edge_padding_high = dense<0> : tensor<2xi64>,
diff --git a/iree/compiler/Conversion/LinalgToLLVM/BUILD b/iree/compiler/Conversion/LinalgToLLVM/BUILD
index 41d9988..c6cafc1 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/BUILD
+++ b/iree/compiler/Conversion/LinalgToLLVM/BUILD
@@ -22,13 +22,14 @@
name = "LinalgToLLVM",
srcs = [
"ConvertToLLVM.cpp",
- "HALInterfaceToMemrefArguments.cpp",
+ "MatMulVectorization.cpp",
"Passes.cpp",
],
hdrs = [
"Passes.h",
],
deps = [
+ "//iree/compiler/Conversion/CodegenUtils",
"//iree/compiler/Conversion/HLOToLinalg",
"//iree/compiler/Dialect/HAL/IR",
"//iree/compiler/Dialect/HAL/IR:HALDialect",
@@ -46,6 +47,7 @@
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:StandardOpsTransforms",
"@llvm-project//mlir:Transforms",
+ "@llvm-project//mlir:VectorOps",
"@llvm-project//mlir:VectorToLLVM",
"@llvm-project//mlir:VectorToSCF",
],
diff --git a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
index d21dc19..fddc144 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -21,7 +21,7 @@
"Passes.h"
SRCS
"ConvertToLLVM.cpp"
- "HALInterfaceToMemrefArguments.cpp"
+ "MatMulVectorization.cpp"
"Passes.cpp"
DEPS
MLIRAffineToStandard
@@ -35,8 +35,10 @@
MLIRStandardOpsTransforms
MLIRStandardToLLVM
MLIRTransforms
+ MLIRVector
MLIRVectorToLLVM
MLIRVectorToSCF
+ iree::compiler::Conversion::CodegenUtils
iree::compiler::Conversion::HLOToLinalg
iree::compiler::Dialect::HAL::IR
iree::compiler::Dialect::HAL::IR::HALDialect
diff --git a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
index 193eb3c..777fb02 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
@@ -17,6 +17,7 @@
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "iree/compiler/Dialect/IREE/IR/IREEOps.h"
#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h"
+#include "iree/compiler/Dialect/Shape/IR/ShapeTypes.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
@@ -27,6 +28,7 @@
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Transforms/Passes.h"
+#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Pass/Pass.h"
namespace mlir {
@@ -124,6 +126,180 @@
}
};
+/// Returns true if `aOp` has a desciptor (set, binding) pair smaller than
+/// `bOp`. Note that this ignores the offset.
+bool operator<(IREE::HAL::InterfaceBindingOp aOp,
+ IREE::HAL::InterfaceBindingOp bOp) {
+ if (aOp.set().getZExtValue() == bOp.set().getZExtValue())
+ return aOp.binding().getZExtValue() < bOp.binding().getZExtValue();
+ return aOp.set().getZExtValue() < bOp.set().getZExtValue();
+}
+
+// Change signature of entry function to func
+// entry_func(%packed_buffers_arg_ptr:
+// !<llvm.int8**>, %push_constant: !<llvm.int64*>) and lower IREE and HAL ops to
+// corresponding LLVMIR ops to construct memref descriptors and load
+// push_constant values.
+class ConvertFuncWithHALInterface : public ConvertToLLVMPattern {
+ public:
+ explicit ConvertFuncWithHALInterface(MLIRContext *context,
+ LLVMTypeConverter &typeconverter)
+ : ConvertToLLVMPattern(FuncOp::getOperationName(), context,
+ typeconverter) {}
+
+ LogicalResult matchAndRewrite(
+ Operation *op, ArrayRef<Value> operands,
+ ConversionPatternRewriter &rewriter) const override {
+ if (SymbolTable::getSymbolVisibility(op) != SymbolTable::Visibility::Public)
+ return failure();
+ auto funcOp = dyn_cast_or_null<FuncOp>(op);
+ FunctionType fnType = funcOp.getType();
+ if (fnType.getNumInputs() != 0) {
+ return rewriter.notifyMatchFailure(
+ funcOp, "entry function should not have inputs");
+ }
+
+ // Get interface buffers from all the blocks.
+ SmallVector<IREE::PlaceholderOp, 8> bufferOps;
+ SmallVector<IREE::HAL::InterfaceLoadConstantOp, 8> loadOps;
+ for (Block &block : funcOp.getBlocks()) {
+ for (Operation &op : block) {
+ if (auto phOp = dyn_cast<IREE::PlaceholderOp>(op))
+ bufferOps.push_back(phOp);
+ if (auto phOp = dyn_cast<IREE::HAL::InterfaceLoadConstantOp>(op)) {
+ loadOps.push_back(phOp);
+ }
+ }
+ }
+
+ if (bufferOps.empty()) return failure();
+
+ // A map from buffer ops to their corresponding interface binding ops.
+ llvm::DenseMap<Operation *, IREE::HAL::InterfaceBindingOp> bufferBindingMap;
+ for (auto bufferOp : bufferOps) {
+ auto symbol = SymbolTable::lookupNearestSymbolFrom(
+ bufferOp, bufferOp.getAttrOfType<SymbolRefAttr>("binding"));
+ bufferBindingMap[bufferOp] = cast<IREE::HAL::InterfaceBindingOp>(symbol);
+ }
+
+ // Sort buffers according to their descriptor (set, binding) pair.
+ llvm::sort(bufferOps, [&bufferBindingMap](IREE::PlaceholderOp aBuffer,
+ IREE::PlaceholderOp bBuffer) {
+ return bufferBindingMap[aBuffer] < bufferBindingMap[bBuffer];
+ });
+
+ // A map from buffer ops to their corresponding function argument indices.
+ llvm::DenseMap<Operation *, unsigned> bufferArgMap;
+ // A map from binding ops to their corresponding function argument indices.
+ llvm::DenseMap<Operation *, unsigned> bindingArgMap;
+ llvm::SmallVector<MemRefType, 4> inputMemRefTypes;
+ llvm::SmallVector<LLVM::LLVMType, 4> inputStructPtrs;
+ unsigned argIndex = 0;
+ for (auto bufferOp : bufferOps) {
+ auto binding = bufferBindingMap[bufferOp];
+ auto it = bindingArgMap.find(binding);
+ if (it != bindingArgMap.end()) {
+ bufferArgMap[bufferOp] = it->second;
+ } else {
+ bindingArgMap[binding] = argIndex;
+ bufferArgMap[bufferOp] = argIndex;
+ ++argIndex;
+ }
+
+ auto memrefType = bufferOp.getType().dyn_cast_or_null<MemRefType>();
+ inputMemRefTypes.push_back(memrefType);
+ auto elementType = typeConverter.convertType(memrefType.getElementType())
+ .dyn_cast<LLVM::LLVMType>();
+ if (!elementType) return failure();
+ inputStructPtrs.push_back(
+ elementType.getPointerTo(memrefType.getMemorySpace()));
+ }
+
+ TypeConverter::SignatureConversion signatureConverter(/*numOrigInputs=*/0);
+
+ // func foo(%packed_buffer_args: !llvm<i8**>, %push_constant: !llvm<i64*>)
+ auto packedBuffersArgsTy =
+ LLVM::LLVMType::getInt8PtrTy(typeConverter.getDialect()).getPointerTo();
+ auto pushConstantArgTy =
+ LLVM::LLVMType::getInt64Ty(typeConverter.getDialect()).getPointerTo();
+ signatureConverter.addInputs(packedBuffersArgsTy);
+ signatureConverter.addInputs(pushConstantArgTy);
+
+ // Create the new function's signature.
+ Location loc = funcOp.getLoc();
+ auto newFuncOp = rewriter.create<FuncOp>(
+ loc, funcOp.getName(),
+ rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
+ llvm::None),
+ ArrayRef<NamedAttribute>());
+
+ // Move all ops in the old function's region to the new function.
+ rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
+ newFuncOp.end());
+ rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
+
+ auto builder = OpBuilder::atBlockBegin(&(newFuncOp.getBlocks().front()));
+
+ // Cast and unpack input packed_buffer_arguments and construct memref
+ // descriptors.
+ Value packedBuffersArgsPtr = builder.create<LLVM::BitcastOp>(
+ loc,
+ LLVM::LLVMType::getStructTy(typeConverter.getDialect(), inputStructPtrs)
+ .getPointerTo(),
+ newFuncOp.getArgument(0));
+ Value packedBuffersArgs =
+ builder.create<LLVM::LoadOp>(loc, packedBuffersArgsPtr);
+ for (auto bufferOp : bufferOps) {
+ MemRefType memrefType = bufferOp.getType().dyn_cast_or_null<MemRefType>();
+ if (!memrefType) return failure();
+ const auto index = bufferArgMap[bufferOp];
+ Value bufferPtr = builder.create<LLVM::ExtractValueOp>(
+ loc, inputStructPtrs[index], packedBuffersArgs,
+ rewriter.getI64ArrayAttr(index));
+ if (memrefType.hasStaticShape()) {
+ auto desc = MemRefDescriptor::fromStaticShape(
+ builder, loc, typeConverter, memrefType, bufferPtr);
+ rewriter.replaceOp(bufferOp, {desc});
+ } else {
+ auto desc = MemRefDescriptor::undef(
+ builder, loc, typeConverter.convertType(memrefType));
+ desc.setAllocatedPtr(builder, loc, bufferPtr);
+ desc.setAlignedPtr(builder, loc, bufferPtr);
+ rewriter.replaceOp(bufferOp, {desc});
+ }
+ }
+
+ // Lower hal.interface.load.constant ops into llvm.getelementptr, llvm.load
+ for (auto loadOp : loadOps) {
+ Value offset = builder.create<LLVM::ConstantOp>(
+ loc, LLVM::LLVMType::getInt64Ty(typeConverter.getDialect()),
+ builder.getI64IntegerAttr(loadOp.offset().getZExtValue()));
+ Value constPtr = builder.create<LLVM::GEPOp>(loc, pushConstantArgTy,
+ newFuncOp.getArgument(1),
+ ArrayRef<Value>({offset}));
+ Value dimConstant = builder.create<LLVM::LoadOp>(loc, constPtr);
+ rewriter.replaceOp(loadOp, dimConstant);
+ }
+
+ rewriter.eraseOp(funcOp);
+ return success();
+ }
+};
+
+class RemoveInterfaceOpPattern : public ConvertToLLVMPattern {
+ public:
+ explicit RemoveInterfaceOpPattern(MLIRContext *context,
+ LLVMTypeConverter &typeconverter)
+ : ConvertToLLVMPattern(IREE::HAL::InterfaceOp::getOperationName(),
+ context, typeconverter) {}
+ LogicalResult matchAndRewrite(
+ Operation *op, ArrayRef<Value> operands,
+ ConversionPatternRewriter &rewriter) const override {
+ rewriter.eraseOp(op);
+ return success();
+ }
+};
+
namespace {
struct ConvertToLLVMPass
: public PassWrapper<ConvertToLLVMPass, OperationPass<ModuleOp>> {
@@ -133,9 +309,25 @@
} // namespace
void ConvertToLLVMPass::runOnOperation() {
+ // Vector -> Vector transformation is needed before we do any conversion to
+ // LLVM.
+ {
+ OwningRewritePatternList patterns;
+ vector::populateVectorToVectorCanonicalizationPatterns(patterns,
+ &getContext());
+ vector::populateVectorSlicesLoweringPatterns(patterns, &getContext());
+ vector::populateVectorContractLoweringPatterns(patterns, &getContext());
+ applyPatternsAndFoldGreedily(getOperation(), patterns);
+ }
+ //
auto module = getOperation();
- OwningRewritePatternList patterns;
+
LLVMTypeConverter converter(&getContext());
+ converter.addConversion([](Shape::RankedShapeType, SmallVectorImpl<Type> &) {
+ return success();
+ });
+
+ OwningRewritePatternList patterns;
populateAffineToStdConversionPatterns(patterns, &getContext());
populateLoopToStdConversionPatterns(patterns, &getContext());
populateExpandTanhPattern(patterns, &getContext());
@@ -145,11 +337,12 @@
populateVectorToLLVMConversionPatterns(converter, patterns);
populateLinalgToLLVMConversionPatterns(converter, patterns, &getContext());
// The following patterns resolves dynamic shapes by substituting tie_shape
- // ops with an updated memref descriptors and replacing RankDimOp with actual
- // index loaded from memref<?xi32> that holds all dynamic shapes
- // push constants.
- patterns.insert<ConvertRankedDimPattern, ConvertTieShapePattern,
- RemoveMakeRankedShape>(&getContext(), converter);
+ // ops with an updated memref descriptors and replacing RankDimOp with
+ // actual index loaded from memref<?xi32> that holds all dynamic shapes push
+ // constants.
+ patterns.insert<ConvertFuncWithHALInterface, ConvertRankedDimPattern,
+ ConvertTieShapePattern, RemoveMakeRankedShape,
+ RemoveInterfaceOpPattern>(&getContext(), converter);
LLVMConversionTarget target(getContext());
target.addLegalOp<ModuleOp, ModuleTerminatorOp>();
if (failed(applyPartialConversion(module, target, patterns)))
@@ -162,7 +355,8 @@
static PassRegistration<ConvertToLLVMPass> pass(
"iree-codegen-convert-to-llvm",
- "Perform final conversion from Linalg/HAL/Shape/Vector/Standard to LLVMIR "
+ "Perform final conversion from Linalg/HAL/Shape/Vector/Standard to "
+ "LLVMIR "
"dialect",
[] { return std::make_unique<ConvertToLLVMPass>(); });
diff --git a/iree/compiler/Conversion/LinalgToLLVM/HALInterfaceToMemrefArguments.cpp b/iree/compiler/Conversion/LinalgToLLVM/HALInterfaceToMemrefArguments.cpp
deleted file mode 100644
index ac968e4..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/HALInterfaceToMemrefArguments.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-
-#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/IREE/IR/IREEOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-namespace iree_compiler {
-namespace {
-
-/// Returns true if the given function contains interface related operations
-/// that are used by other ops.
-bool containsUsedInterfaceOp(FuncOp funcOp) {
- for (Block& block : funcOp.getBlocks()) {
- for (Operation& op : block) {
- if (!op.getUses().empty() &&
- (isa<IREE::PlaceholderOp>(op) ||
- isa<IREE::HAL::InterfaceLoadConstantOp>(op))) {
- return true;
- }
- }
- }
- return false;
-}
-
-/// Returns true if `aOp` has a desciptor (set, binding) pair smaller than
-/// `bOp`. Note that this ignores the offset.
-bool operator<(IREE::HAL::InterfaceBindingOp aOp,
- IREE::HAL::InterfaceBindingOp bOp) {
- if (aOp.set().getZExtValue() == bOp.set().getZExtValue())
- return aOp.binding().getZExtValue() < bOp.binding().getZExtValue();
- return aOp.set().getZExtValue() < bOp.set().getZExtValue();
-}
-
-/// A pattern to process function interface. It replaces interface related ops
-/// with function arguments to match LLVM's CodeGen's ABI contract.
-///
-/// IREE scheduler passes interface ABI information via hal.interface.* ops to
-/// all backends. We create iree.placeholder ops to represent buffers behind
-/// those hal.interface.* ops. However the LLVM CodeGen uses function parameters
-/// and memref descriptors for ABI. So we need to bridge the gap somewhere.
-///
-/// This pass finds all interface buffers used in the function, sort them
-/// according to the descriptor (set, binding) pair, and put unique ones as
-/// function parameters in order.
-/// Note: This should be kept consistent with LLVM's HAL backend.
-struct ProcessFuncInterfacePattern : public OpConversionPattern<FuncOp> {
- using OpConversionPattern::OpConversionPattern;
- LogicalResult matchAndRewrite(
- FuncOp funcOp, ArrayRef<Value> Operands,
- ConversionPatternRewriter& rewriter) const override {
- // Only process entry functions.
- if (SymbolTable::getSymbolVisibility(funcOp) !=
- SymbolTable::Visibility::Public)
- return failure();
-
- FunctionType fnType = funcOp.getType();
- if (fnType.getNumInputs() != 0)
- return rewriter.notifyMatchFailure(
- funcOp, "entry function should not have inputs");
-
- // Get interface buffers from all the blocks.
- SmallVector<IREE::PlaceholderOp, 8> bufferOps;
- SmallVector<IREE::HAL::InterfaceLoadConstantOp, 8> loadOps;
- for (Block& block : funcOp.getBlocks()) {
- for (Operation& op : block) {
- if (auto phOp = dyn_cast<IREE::PlaceholderOp>(op))
- bufferOps.push_back(phOp);
- if (auto phOp = dyn_cast<IREE::HAL::InterfaceLoadConstantOp>(op)) {
- loadOps.push_back(phOp);
- }
- }
- }
-
- if (bufferOps.empty()) return failure();
-
- // A map from buffer ops to their corresponding interface binding ops.
- llvm::DenseMap<Operation*, IREE::HAL::InterfaceBindingOp> bufferBindingMap;
- for (auto bufferOp : bufferOps) {
- auto symbol = SymbolTable::lookupNearestSymbolFrom(
- bufferOp, bufferOp.getAttrOfType<SymbolRefAttr>("binding"));
- bufferBindingMap[bufferOp] = cast<IREE::HAL::InterfaceBindingOp>(symbol);
- }
-
- // Sort buffers according to their descriptor (set, binding) pair.
- llvm::sort(bufferOps, [&bufferBindingMap](IREE::PlaceholderOp aBuffer,
- IREE::PlaceholderOp bBuffer) {
- return bufferBindingMap[aBuffer] < bufferBindingMap[bBuffer];
- });
-
- // Create a function argument for each of the unique binding pointed by the
- // buffer ops.
- TypeConverter::SignatureConversion signatureConverter(/*numOrigInputs=*/0);
- // A map from buffer ops to their corresponding function argument indices.
- llvm::DenseMap<Operation*, unsigned> bufferArgMap;
- // A map from binding ops to their corresponding function argument indices.
- llvm::DenseMap<Operation*, unsigned> bindingArgMap;
- unsigned argIndex = 0;
- for (auto bufferOp : bufferOps) {
- auto binding = bufferBindingMap[bufferOp];
- auto it = bindingArgMap.find(binding);
- if (it != bindingArgMap.end()) {
- bufferArgMap[bufferOp] = it->second;
- } else {
- bindingArgMap[binding] = argIndex;
- bufferArgMap[bufferOp] = argIndex;
- signatureConverter.addInputs(bufferOp.getType());
- ++argIndex;
- }
- }
- Type dynamicDimsBufferType =
- MemRefType::get(ShapedType::kDynamicSize, rewriter.getIntegerType(32));
- signatureConverter.addInputs(dynamicDimsBufferType);
-
- // Create the new function's signature.
- Location loc = funcOp.getLoc();
- auto newFuncOp = rewriter.create<FuncOp>(
- loc, funcOp.getName(),
- rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
- llvm::None),
- ArrayRef<NamedAttribute>());
- newFuncOp.setAttr("llvm.emit_c_interface",
- mlir::UnitAttr::get(funcOp.getContext()));
-
- // Move all ops in the old function's region to the new function.
- rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
- newFuncOp.end());
- rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
-
- // Replace all buffer ops' uses with the newly created function arguments
- // and erase them.
- for (auto bufferOp : bufferOps) {
- bufferOp.replaceAllUsesWith(
- newFuncOp.getArgument(bufferArgMap[bufferOp]));
-
- rewriter.eraseOp(bufferOp);
- }
-
- // Lower all hal.interface.load.constant ops into std.load
- // from the last buffer holding all dynamic dimensions with the proper
- // offset.
- Type indexType = rewriter.getIndexType();
- auto builder = OpBuilder::atBlockBegin(&(newFuncOp.getBlocks().front()));
- auto newLoc = newFuncOp.front().front().getLoc();
- for (auto loadOp : loadOps) {
- SmallVector<Value, 1> indices;
- Value constantOffset = builder.create<ConstantOp>(
- newLoc, indexType,
- rewriter.getIntegerAttr(indexType, loadOp.offset().getZExtValue()));
- indices.push_back(constantOffset);
- Value loadDim = builder.create<LoadOp>(
- newLoc, newFuncOp.getArgument(newFuncOp.getNumArguments() - 1),
- indices);
- Value loadDimIndex =
- builder.create<IndexCastOp>(newLoc, loadDim, indexType);
- loadOp.replaceAllUsesWith(loadDimIndex);
- rewriter.eraseOp(loadOp);
- }
- rewriter.eraseOp(funcOp);
- return success();
- }
-};
-
-struct RemoveInterfaceOpPattern
- : public OpRewritePattern<IREE::HAL::InterfaceOp> {
- using OpRewritePattern::OpRewritePattern;
- LogicalResult matchAndRewrite(IREE::HAL::InterfaceOp interfaceOp,
- PatternRewriter& rewriter) const override {
- rewriter.eraseOp(interfaceOp);
- return success();
- }
-};
-
-/// Converting from Linalg to LLVM needs to run on a module and since it
-/// applies a full conversion, make a module with jst the impl function.
-struct HALInterfaceToMemrefArgumentsPass
- : PassWrapper<HALInterfaceToMemrefArgumentsPass, OperationPass<ModuleOp>> {
- void runOnOperation() override {
- MLIRContext& context = getContext();
-
- OwningRewritePatternList patterns;
- patterns.insert<ProcessFuncInterfacePattern>(&context);
- patterns.insert<RemoveInterfaceOpPattern>(&context);
-
- ConversionTarget target(context);
- // Convert the interface related ops away.
- target.addDynamicallyLegalOp<FuncOp>(
- [](FuncOp funcOp) { return !containsUsedInterfaceOp(funcOp); });
- target.addIllegalOp<IREE::PlaceholderOp>();
- target.addIllegalDialect<IREE::HAL::HALDialect>();
- // Allow the rest.
- target.markUnknownOpDynamicallyLegal([](Operation*) { return true; });
-
- if (failed(applyFullConversion(getOperation(), target, patterns)))
- return signalPassFailure();
- }
-};
-
-} // namespace
-
-std::unique_ptr<OperationPass<ModuleOp>>
-createHALInterfaceToMemrefArgumentsPass() {
- return std::make_unique<HALInterfaceToMemrefArgumentsPass>();
-}
-
-static PassRegistration<HALInterfaceToMemrefArgumentsPass> pass(
- "iree-codegen-hal-interface-to-memref-arguments-pass",
- "Convert a function with HAL bindings interface to memref arguments",
- [] { return std::make_unique<HALInterfaceToMemrefArgumentsPass>(); });
-
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/MatMulVectorization.cpp b/iree/compiler/Conversion/LinalgToLLVM/MatMulVectorization.cpp
new file mode 100644
index 0000000..a36358d
--- /dev/null
+++ b/iree/compiler/Conversion/LinalgToLLVM/MatMulVectorization.cpp
@@ -0,0 +1,98 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "iree/compiler/Conversion/CodegenUtils/MatmulCodegenStrategy.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+
+namespace mlir {
+namespace iree_compiler {
+
+static llvm::cl::opt<int> l1TileSize(
+ "iree-codegen-linalg-to-llvm-matmul-l1-tile-size",
+ llvm::cl::desc("Specify the size of L1 tile for matmul vector lowering"),
+ llvm::cl::init(4));
+
+static llvm::cl::opt<int> l2TileSize(
+ "iree-codegen-linalg-to-llvm-matmul-l2-tile-size",
+ llvm::cl::desc("Specify the size of L2 tile for matmul vector lowering"),
+ llvm::cl::init(32));
+
+static llvm::cl::opt<int> l3TileSize(
+ "iree-codegen-linalg-to-llvm-matmul-l3-tile-size",
+ llvm::cl::desc("Specify the size of L3 tile for matmul vector lowering"),
+ llvm::cl::init(64));
+
+static llvm::cl::opt<bool> unrollVectorTransfer(
+ "iree-codegen-linalg-to-llvm-matmul-unroll-vector-transfer",
+ llvm::cl::desc("If true vector transfers operation loop get unrolled."),
+ llvm::cl::init(true));
+
+static llvm::cl::opt<std::string> vectorOpLowering(
+ "iree-codegen-linalg-to-llvm-matmul-vector-op-lowerig",
+ llvm::cl::desc(
+ "Select the vector operation for lowering linalg.matmul, options : "
+ "{'outer_product', 'vector_contract', 'matrix_internsics'}"),
+ llvm::cl::init("outer_product"));
+
+namespace {
+struct MatMulTileAndVectorizePass
+ : PassWrapper<MatMulTileAndVectorizePass, FunctionPass> {
+ void runOnFunction() override;
+};
+} // namespace
+
+void MatMulTileAndVectorizePass::runOnFunction() {
+ FuncOp fn = getFunction();
+ MatmulCodegenStrategy strategy;
+ strategy
+ .tile<linalg::MatmulOp>(linalg::LinalgTilingOptions().setTileSizes(
+ {l3TileSize, l3TileSize, l3TileSize}))
+ .tile<linalg::MatmulOp>(linalg::LinalgTilingOptions().setTileSizes(
+ {l2TileSize, l2TileSize, l2TileSize}))
+ .tile<linalg::MatmulOp>(linalg::LinalgTilingOptions().setTileSizes(
+ {l1TileSize, l1TileSize, l1TileSize}))
+ .vectorize<linalg::MatmulOp>()
+ .setVectorTransferToSCFOptions(
+ VectorTransferToSCFOptions().setUnroll(unrollVectorTransfer));
+ if (vectorOpLowering == "outer_product") {
+ strategy.setVectorTransformsOptions(
+ vector::VectorTransformsOptions().setVectorTransformsOptions(
+ vector::VectorContractLowering::OuterProduct));
+ } else if (vectorOpLowering == "vector_contract") {
+ strategy.setVectorTransformsOptions(
+ vector::VectorTransformsOptions().setVectorTransformsOptions(
+ vector::VectorContractLowering::OuterProduct));
+ } else if (vectorOpLowering == "matrix_internsics") {
+ strategy.setVectorTransformsOptions(
+ vector::VectorTransformsOptions().setVectorTransformsOptions(
+ vector::VectorContractLowering::OuterProduct));
+ } else {
+ signalPassFailure();
+ }
+ strategy.setDefaultCPULowering();
+ strategy.transform(fn);
+}
+
+std::unique_ptr<FunctionPass> createMatMulTileAndVectorizePass() {
+ return std::make_unique<MatMulTileAndVectorizePass>();
+}
+
+static PassRegistration<MatMulTileAndVectorizePass> pass(
+ "iree-codegen-linalg-to-llvm-matmul-vectorization-pass",
+ "Tile and vectorize linalg.matmul operation",
+ [] { return std::make_unique<MatMulTileAndVectorizePass>(); });
+
+} // namespace iree_compiler
+} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
index e8c6d9c..8631cdf 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -25,6 +25,8 @@
namespace iree_compiler {
void addLinalgToLLVMPasses(OpPassManager &passManager) {
+ // Linalg -> Vectors Ops.
+ passManager.addPass(createMatMulTileAndVectorizePass());
// Linalg -> SCF
passManager.addPass(createConvertLinalgToLoopsPass());
passManager.addPass(createCanonicalizerPass());
@@ -35,10 +37,7 @@
passManager.addPass(createCanonicalizerPass());
passManager.addPass(createCSEPass());
- // Convert ExecuableOp entry function to use memref arguments.
- passManager.addPass(createHALInterfaceToMemrefArgumentsPass());
-
- // (Linalg, STD) -> LLVM
+ // (HAL, IREE, Linalg, STD) -> LLVM
// OpPassManager& llvmPassManager = passManager.nest<ModuleOp>();
passManager.addPass(createConvertToLLVMPass());
passManager.addPass(createCanonicalizerPass());
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.h b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
index 5bfb893..2a4db8c 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.h
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
@@ -20,10 +20,8 @@
namespace mlir {
namespace iree_compiler {
-/// Converts function signture type from hal interface op annotation to memref
-/// argument.
-std::unique_ptr<OperationPass<ModuleOp>>
-createHALInterfaceToMemrefArgumentsPass();
+/// Converts linalg::MatmulOp into LLVM dialect
+std::unique_ptr<FunctionPass> createMatMulTileAndVectorizePass();
/// Pass to perform final conversion to LLVM dialect.
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass();
diff --git a/iree/compiler/Conversion/LinalgToLLVM/test/convert_to_llvm.mlir b/iree/compiler/Conversion/LinalgToLLVM/test/convert_to_llvm.mlir
index c8bf6d8..0a8034f 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/test/convert_to_llvm.mlir
+++ b/iree/compiler/Conversion/LinalgToLLVM/test/convert_to_llvm.mlir
@@ -1,24 +1,36 @@
// RUN: iree-opt -iree-codegen-convert-to-llvm -split-input-file %s | IreeFileCheck %s
-// CHECK-LABEL: func @convert_dynamic_shape
-func @convert_dynamic_shape(%arg0: memref<?x?xf32>, %arg1: memref<2xi32>){
- %c0 = constant 0 : index
- %c1 = constant 1 : index
- %0 = load %arg1[%c0] : memref<2xi32>
- %1 = index_cast %0 : i32 to index
- %3 = load %arg1[%c1] : memref<2xi32>
- %4 = index_cast %3 : i32 to index
- %5 = shapex.make_ranked_shape %1, %4 : (index, index) -> !shapex.ranked_shape<[?,?]>
- %6 = shapex.tie_shape %arg0, %5 : memref<?x?xf32>, !shapex.ranked_shape<[?,?]>
- return
+func @convert_dynamic_shape() -> f32 {
+ %c0 = constant 0 : index
+ %0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<?x?xf32>
+ %1 = hal.interface.load.constant offset = 0 : index
+ %2 = hal.interface.load.constant offset = 1 : index
+ %3 = shapex.make_ranked_shape %1, %2 : (index, index) -> !shapex.ranked_shape<[?,?]>
+ %6 = shapex.tie_shape %0, %3 : memref<?x?xf32>, !shapex.ranked_shape<[?,?]>
+ %7 = load %6[%c0, %c0] : memref<?x?xf32>
+ return %7 : f32
}
-// CHECK: %[[DIM0:.+]] = llvm.sext
-// CHECK: %[[DIM1:.+]] = llvm.sext
-// CHECK: llvm.insertvalue %[[DIM0]], %[[MEMREFBASEPTR:.+]][3, 0]
-// CHECK: %[[MEMREFBASEPTR_1:.+]] = llvm.insertvalue %[[DIM1]], %[[MEMREFBASEPTR:.+]][3, 1]
-// CHECK: %[[STRIDE1:.+]] = llvm.mlir.constant(1 : index) : !llvm.i64
-// CHECK: %[[MEMREFBASEPTR_2:.+]] = llvm.insertvalue %[[STRIDE1]], %[[MEMREFBASEPTR_1]][4, 1]
-// CHECK: %[[ESTRIDE1:.+]] = llvm.extractvalue %[[MEMREFBASEPTR_2]][4, 1]
-// CHECK: %[[EDIM1:.+]] = llvm.extractvalue %[[MEMREFBASEPTR_2]][3, 1]
-// CHECK: %[[STRIDE0:.+]] = llvm.mul %[[ESTRIDE1]], %[[EDIM1]] : !llvm.i64
-// CHECK: llvm.insertvalue %[[STRIDE0]], %[[MEMREFBASEPTR_2]][4, 0]
\ No newline at end of file
+hal.interface @legacy_io attributes {push_constants = 2 : i32, sym_visibility = "private"} {
+ hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
+}
+// CHECK: llvm.func @convert_dynamic_shape(%[[ARG0:.+]]: !llvm<"i8**">, %[[ARG1:.+]]: !llvm<"i64*">)
+// CHECK: %[[PACKED_ARGS_PTR:.+]] = llvm.bitcast %[[ARG0]] : !llvm<"i8**"> to !llvm<"{ float* }*">
+// CHECK: %[[PACKED_ARGS:.+]] = llvm.load %[[PACKED_ARGS_PTR]] : !llvm<"{ float* }*">
+// CHECK: %[[MEMREF0_DATA_PTR:.+]] = llvm.extractvalue %[[PACKED_ARGS]][0] : !llvm<"{ float* }">
+// CHECK: %[[MEMREF0:.+]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[MEMREF0_0:.+]] = llvm.insertvalue %[[MEMREF0_DATA_PTR]], %[[MEMREF0]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[MEMREF0_1:.+]] = llvm.insertvalue %[[MEMREF0_DATA_PTR]], %[[MEMREF0_0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[CONST0:.+]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK: %[[DIM0_PTR:.+]] = llvm.getelementptr %[[ARG1]][%[[CONST0]]] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
+// CHECK: %[[DIM0:.+]] = llvm.load %[[DIM0_PTR]] : !llvm<"i64*">
+// CHECK: %[[CONST1:.+]] = llvm.mlir.constant(1 : i64) : !llvm.i64
+// CHECK: %[[DIM1_PTR:.+]] = llvm.getelementptr %[[ARG1]][%[[CONST1]]] : (!llvm<"i64*">, !llvm.i64) -> !llvm<"i64*">
+// CHECK: %[[DIM1:.+]] = llvm.load %[[DIM1_PTR]] : !llvm<"i64*">
+// CHECK: %[[MEMREF0_2:.+]] = llvm.insertvalue %[[DIM0]], %[[MEMREF0_1]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[MEMREF0_3:.+]] = llvm.insertvalue %[[DIM1]], %[[MEMREF0_2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[CONST1_STRIDE:.+]] = llvm.mlir.constant(1 : index) : !llvm.i64
+// CHECK: %[[MEMREF0_4:.+]] = llvm.insertvalue %[[CONST1_STRIDE]], %[[MEMREF0_3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[STRIDE_DIM1:.+]] = llvm.extractvalue %[[MEMREF0_4]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[DIM1_0:.+]] = llvm.extractvalue %[[MEMREF0_4]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
+// CHECK: %[[STRIDE_DIM0:.+]] = llvm.mul %[[STRIDE_DIM1]], %[[DIM1_0]] : !llvm.i64
+// CHECK: llvm.insertvalue %[[STRIDE_DIM0]], %[[MEMREF0_4]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }">
diff --git a/iree/compiler/Conversion/LinalgToLLVM/test/matmul_vectorization.mlir b/iree/compiler/Conversion/LinalgToLLVM/test/matmul_vectorization.mlir
new file mode 100644
index 0000000..7d47f47
--- /dev/null
+++ b/iree/compiler/Conversion/LinalgToLLVM/test/matmul_vectorization.mlir
@@ -0,0 +1,31 @@
+// RUN: iree-opt --iree-codegen-linalg-to-llvm-matmul-vectorization-pass -split-input-file %s | IreeFileCheck %s
+
+// CHECK-LABEL: func @matmul_128x128x128
+// CHECK-SAME: (%[[ARG0:.+]]: memref<128x128xf32>, %[[ARG1:.+]]: memref<128x128xf32>, %[[ARG2:.+]]: memref<128x128xf32>)
+func @matmul_128x128x128(%arg0 : memref<128x128xf32>, %arg1: memref<128x128xf32>, %arg2: memref<128x128xf32>) {
+ linalg.matmul %arg0, %arg1, %arg2 : (memref<128x128xf32>, memref<128x128xf32>, memref<128x128xf32>)
+ return
+}
+// CHECK: %[[L3END:.+]] = constant 128 : index
+// CHECK: %[[L3STEP:.+]] = constant 64 : index
+// CHECK: %[[L1STEP:.+]] = constant 4 : index
+// CHECK: %[[L2STEP:.+]] = constant 32 : index
+// CHECK: %[[START:.+]] = constant 0 : index
+// CHECK: scf.for %[[IL3:.+]] = %[[START]] to %[[L3END]] step %[[L3STEP]]
+// CHECK: scf.for %[[JL3:.+]] = %[[START]] to %[[L3END]] step %[[L3STEP]]
+// CHECK: scf.for %[[KL3:.+]] = %[[START]] to %[[L3END]] step %[[L3STEP]]
+// CHECK: %[[ARG0_TILE_L3:.+]] = subview %[[ARG0]][%[[IL3]], %[[KL3]]] [64, 64] [1, 1] : memref<128x128xf32> to memref<64x64xf32
+// CHECK: %[[ARG1_TILE_L3:.+]] = subview %[[ARG1]][%[[KL3]], %[[JL3]]] [64, 64] [1, 1] : memref<128x128xf32> to memref<64x64xf32
+// CHECK: %[[ARG2_TILE_L3:.+]] = subview %[[ARG2]][%[[IL3]], %[[JL3]]] [64, 64] [1, 1] : memref<128x128xf32> to memref<64x64xf32
+// CHECK: scf.for %[[IL2:.+]] = %[[START]] to %[[L3STEP]] step %[[L2STEP]]
+// CHECK: scf.for %[[JL2:.+]] = %[[START]] to %[[L3STEP]] step %[[L2STEP]]
+// CHECK: scf.for %[[KL2:.+]] = %[[START]] to %[[L3STEP]] step %[[L2STEP]]
+// CHECK: %[[ARG0_TILE_L2:.+]] = subview %[[ARG0_TILE_L3]][%[[IL2]], %[[KL2]]] [32, 32] [1, 1] : memref<64x64xf32
+// CHECK: %[[ARG1_TILE_L2:.+]] = subview %[[ARG1_TILE_L3]][%[[KL2]], %[[JL2]]] [32, 32] [1, 1] : memref<64x64xf32
+// CHECK: %[[ARG2_TILE_L2:.+]] = subview %[[ARG2_TILE_L3]][%[[IL2]], %[[JL2]]] [32, 32] [1, 1] : memref<64x64xf32
+// CHECK: scf.for %[[IL1:.+]] = %[[START]] to %[[L2STEP]] step %[[L1STEP]]
+// CHECK: scf.for %[[JL1:.+]] = %[[START]] to %[[L2STEP]] step %[[L1STEP]]
+// CHECK: scf.for %[[KL1:.+]] = %[[START]] to %[[L2STEP]] step %[[L1STEP]]
+// CHECK: %[[ARG0_TILE_L1:.+]] = subview %[[ARG0_TILE_L2]][%[[IL1]], %[[KL1]]] [4, 4] [1, 1] : memref<32x32xf32
+// CHECK: %[[ARG1_TILE_L1:.+]] = subview %[[ARG1_TILE_L2]][%[[KL1]], %[[JL1]]] [4, 4] [1, 1] : memref<32x32xf32
+// CHECK: %[[ARG2_TILE_L1:.+]] = subview %[[ARG2_TILE_L2]][%[[IL1]], %[[JL1]]] [4, 4] [1, 1] : memref<32x32xf32
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp
index 98e91fa..81514ec 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp
+++ b/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp
@@ -563,7 +563,7 @@
ConversionPatternRewriter &rewriter) const override {
// Check for marker that specifies that the linalg op is to be partitioned
// across threads within a workgroup.
- if (!hasWorkItemMarker(linalgOp)) return failure();
+ if (!hasWorkGroupMarker(linalgOp)) return failure();
Optional<linalg::LinalgLoops> loops =
linalg::linalgLowerOpToLoops<scf::ParallelOp>(rewriter, linalgOp);
if (!loops) return failure();
@@ -587,7 +587,7 @@
LogicalResult matchAndRewrite(
LinalgOpTy linalgOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
- if (!hasWorkItemMarker(linalgOp)) return failure();
+ if (!hasWorkGroupMarker(linalgOp)) return failure();
Optional<linalg::LinalgLoops> loops =
linalg::linalgLowerOpToLoops<scf::ParallelOp>(rewriter, linalgOp);
if (!loops) return failure();
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndFusePass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndFusePass.cpp
index e9dddd6..934e5ae 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndFusePass.cpp
+++ b/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndFusePass.cpp
@@ -314,7 +314,7 @@
LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {
- if (!hasWorkItemMarker(op)) return failure();
+ if (!hasWorkGroupMarker(op)) return failure();
return linalg::LinalgPromotionPattern<linalg::MatmulOp>::matchAndRewrite(
op, rewriter);
}
@@ -365,7 +365,7 @@
.setLoopType(linalg::LinalgTilingLoopType::ParallelLoops),
tileSizeCalculator.getWorkGroupSize(),
linalg::LinalgMarker(ArrayRef<Identifier>(),
- Identifier::get(getWorkItemMarker(), context)));
+ Identifier::get(getWorkGroupMarker(), context)));
applyPatternsAndFoldGreedily(getOperation(), tilingPatterns);
if (useWorkgroupMemory) {
@@ -385,7 +385,7 @@
[&](OpBuilder &b, Value src, Value dst) -> LogicalResult {
return copyToFromWorkgroupMemory(b, src, dst);
}),
- linalg::LinalgMarker(Identifier::get(getWorkItemMarker(), context),
+ linalg::LinalgMarker(Identifier::get(getWorkGroupMarker(), context),
Identifier::get(PromotionMarker, context)));
applyPatternsAndFoldGreedily(getOperation(), promotionPatterns);
}
@@ -394,7 +394,7 @@
OpBuilder builder(context);
funcOp.walk([&builder](linalg::LinalgOp linalgOp) {
if (hasMarker(linalgOp, PromotionMarker)) {
- setWorkItemMarker(linalgOp);
+ setWorkGroupMarker(linalgOp);
insertBarrierAfter(builder, linalgOp.getLoc(), linalgOp);
}
});
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.cpp b/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.cpp
index c874234..47747de 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.cpp
+++ b/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.cpp
@@ -36,8 +36,6 @@
StringRef getWorkGroupMarker() { return "workgroup"; }
-StringRef getWorkItemMarker() { return "workitem"; }
-
bool hasMarker(Operation *op, StringRef marker) {
return checkMarkerValue(op, marker);
}
@@ -46,10 +44,6 @@
return checkMarkerValue(op, getWorkGroupMarker());
}
-bool hasWorkItemMarker(Operation *op) {
- return checkMarkerValue(op, getWorkItemMarker());
-}
-
void setMarker(Operation *op, StringRef marker) {
op->setAttr(linalg::LinalgTransforms::kLinalgTransformMarker,
StringAttr::get(marker, op->getContext()));
@@ -57,6 +51,5 @@
void setWorkGroupMarker(Operation *op) { setMarker(op, getWorkGroupMarker()); }
-void setWorkItemMarker(Operation *op) { setMarker(op, getWorkItemMarker()); }
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.h b/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.h
index 36dccca..e512ead 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.h
+++ b/iree/compiler/Conversion/LinalgToSPIRV/MarkerUtils.h
@@ -31,7 +31,7 @@
namespace iree_compiler {
/// Marker to denote that a linalg operation is to be partitioned to workitems.
-StringRef getWorkItemMarker();
+StringRef getWorkGroupMarker();
/// Returns true if an operation has the specified `marker`. When `marker` is
/// empty, returns true if the operation has any marker.
@@ -39,14 +39,14 @@
/// Returns true if an operation has marker to denote that it is to be
/// partitioned to workitems.
-bool hasWorkItemMarker(Operation *);
+bool hasWorkGroupMarker(Operation *);
/// Sets a given marker on an operation.
void setMarker(Operation *, StringRef);
/// Sets marker to denote that a linalg operation is to be partitioned to
/// workitems.
-void setWorkItemMarker(Operation *);
+void setWorkGroupMarker(Operation *);
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
index 679f523..64621f3 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
@@ -162,7 +162,7 @@
%12 = dim %arg2, %c1 : memref<?x?xf32>
%13 = affine.min #map0(%arg4)[%12]
%14 = subview %arg2[%arg3, %arg4] [%11, %13] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map2>
- linalg.matmul %5, %9, %14 {__internal_linalg_transform__ = "workitem"} : (memref<?x?xf32, #map2>, memref<?x?xf32, #map2>, memref<?x?xf32, #map2>)
+ linalg.matmul %5, %9, %14 {__internal_linalg_transform__ = "workgroup"} : (memref<?x?xf32, #map2>, memref<?x?xf32, #map2>, memref<?x?xf32, #map2>)
}
scf.yield
}
@@ -235,7 +235,7 @@
%13 = affine.min #map5(%arg5)[%4]
%14 = dim %arg2, %c3 : memref<?x?x?x?xf32>
%15 = subview %arg2[%arg3, %arg4, %arg5, 0] [%11, %12, %13, %14] [1, 1, 1, 1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map3>
- linalg.conv(%arg0, %9, %15) {__internal_linalg_transform__ = "workitem", dilations = [1, 1], strides = [1, 1]} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32, #map3>, memref<?x?x?x?xf32, #map3>
+ linalg.conv(%arg0, %9, %15) {__internal_linalg_transform__ = "workgroup", dilations = [1, 1], strides = [1, 1]} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32, #map3>, memref<?x?x?x?xf32, #map3>
scf.yield
}
return
@@ -364,7 +364,7 @@
%9 = affine.min #map3(%arg3)[%2]
%10 = affine.min #map4(%arg4)[%3]
%11 = subview %arg2[%arg3, %arg4] [%9, %10] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map2>
- linalg.pooling_max(%8, %arg1, %11) {__internal_linalg_transform__ = "workitem", dilations = [1, 1], strides = [1, 1]} : memref<?x?xf32, #map2>, memref<?x?xf32>, memref<?x?xf32, #map2>
+ linalg.pooling_max(%8, %arg1, %11) {__internal_linalg_transform__ = "workgroup", dilations = [1, 1], strides = [1, 1]} : memref<?x?xf32, #map2>, memref<?x?xf32>, memref<?x?xf32, #map2>
scf.yield
}
return
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu_option.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu_option.mlir
index 1701535..63f8aa5 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu_option.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu_option.mlir
@@ -32,7 +32,7 @@
%13 = affine.min #map5(%arg5)[%4]
%14 = dim %arg2, %c3 : memref<?x?x?x?xf32>
%15 = subview %arg2[%arg3, %arg4, %arg5, 0] [%11, %12, %13, %14] [1, 1, 1, 1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map3>
- linalg.conv(%arg0, %9, %15) {__internal_linalg_transform__ = "workitem", dilations = [1, 1], strides = [1, 1]} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32, #map3>, memref<?x?x?x?xf32, #map3>
+ linalg.conv(%arg0, %9, %15) {__internal_linalg_transform__ = "workgroup", dilations = [1, 1], strides = [1, 1]} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32, #map3>, memref<?x?x?x?xf32, #map3>
scf.yield
}
return
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/cyclic_to_workgroup.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/cyclic_to_workgroup.mlir
index 110ac24..cac18ab 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/cyclic_to_workgroup.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/cyclic_to_workgroup.mlir
@@ -27,7 +27,7 @@
%12 = dim %arg2, %c1 : memref<?x?xf32>
%13 = affine.min #map0(%arg4)[%12]
%14 = subview %arg2[%arg3, %arg4] [%11, %13] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map2>
- linalg.matmul %5, %9, %14 {__internal_linalg_transform__ = "workitem"} : (memref<?x?xf32, #map2>, memref<?x?xf32, #map2>, memref<?x?xf32, #map2>)
+ linalg.matmul %5, %9, %14 {__internal_linalg_transform__ = "workgroup"} : (memref<?x?xf32, #map2>, memref<?x?xf32, #map2>, memref<?x?xf32, #map2>)
}
scf.yield
}
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/linalg_tile_and_fuse.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/linalg_tile_and_fuse.mlir
index 0e2fe6d..1728d35 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/linalg_tile_and_fuse.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/linalg_tile_and_fuse.mlir
@@ -51,7 +51,7 @@
// CHECK: %[[VIEW2:.+]] = subview %[[ARG2]]
// CHECK: linalg.conv
// CHECK-SAME: %[[ARG0]], %[[VIEW1]], %[[VIEW2]]
-// CHECK-SAME: "workitem"
+// CHECK-SAME: "workgroup"
// -----
@@ -81,7 +81,7 @@
// CHECK: %[[VIEW1:.+]] = subview %[[ARG1]]
// CHECK: %[[VIEW2:.+]] = subview %[[ARG2]]
// CHECK: linalg.matmul
-// CHECK-SAME: "workitem"
+// CHECK-SAME: "workgroup"
// CHECK-SAME: %[[VIEW0]], %[[VIEW1]], %[[VIEW2]]
// -----
@@ -111,4 +111,4 @@
// CHECK: %[[VIEW2:.+]] = subview %[[ARG2]]
// CHECK: linalg.pooling_max
// CHECK-SAME: %[[VIEW0]], %[[ARG1]], %[[VIEW2]]
-// CHECK-SAME: "workitem"
+// CHECK-SAME: "workgroup"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/workgroup_memory_promotion.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/workgroup_memory_promotion.mlir
index 76cfcb8..a24c77b 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/workgroup_memory_promotion.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/workgroup_memory_promotion.mlir
@@ -36,12 +36,12 @@
// CHECK: %[[ALLOC2:.+]] = alloc(%[[C4]], %[[C8]]) : memref<?x?xf32, 3>
// CHECK: %[[SUBVIEW2:.+]] = subview %[[ALLOC2]]
// CHECK: linalg.copy(%[[ARG0SV]], %[[SUBVIEW1]])
-// CHECK-SAME: "workitem"
+// CHECK-SAME: "workgroup"
// CHECK: spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
// CHECK: linalg.copy(%[[ARG1SV]], %[[SUBVIEW2]])
-// CHECK-SAME: "workitem"
+// CHECK-SAME: "workgroup"
// CHECK: spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
-// CHECK: linalg.matmul {{.*}}"workitem"{{.*}} %[[SUBVIEW1]], %[[SUBVIEW2]], %[[RET0SV]]
+// CHECK: linalg.matmul {{.*}}"workgroup"{{.*}} %[[SUBVIEW1]], %[[SUBVIEW2]], %[[RET0SV]]
// CHECK: spv.ControlBarrier "Workgroup", "Workgroup", "AcquireRelease"
// CHECK-DAG: dealloc %[[ALLOC1]] : memref<?x?xf32, 3>
// CHECK-DAG: dealloc %[[ALLOC2]] : memref<?x?xf32, 3>
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 7a190e7..259e3d5 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h
@@ -47,7 +47,6 @@
inline void registerLinalgToLLVMPasses() {
static bool init_once = []() {
// LinalgToLLVM
- createHALInterfaceToMemrefArgumentsPass();
return true;
}();
(void)init_once;
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td
index 23bce9c..13ad2f1 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -610,6 +610,17 @@
let hasFolder = 1;
}
+def FLOW_TensorTraceOp : FLOW_Op<"tensor.trace", []> {
+ let summary = [{trace value(s) operation}];
+ let description = [{
+ Trace point for dispatchable functions.
+ }];
+
+ let arguments = (ins Variadic<FLOW_Tensor>:$operands);
+
+ let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?";
+}
+
//===----------------------------------------------------------------------===//
// Streams
//===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
index 44a19d1..3281c6c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
@@ -25,6 +25,7 @@
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
@@ -40,6 +41,11 @@
namespace {
+static llvm::cl::opt<bool> traceDispatchTensors(
+ "iree-flow-trace-dispatch-tensors",
+ llvm::cl::desc("Trace input/output values for each dispatch function"),
+ llvm::cl::init(false));
+
// Converts a dispatch_region into a dispatch to the outlined region function.
LogicalResult convertToDispatchOp(DispatchRegionOp regionOp,
ExecutableOp executableOp,
@@ -57,11 +63,28 @@
return failure();
}
+ auto getTensorTypeArgs = [](auto args) {
+ SmallVector<Value, 4> res;
+ for (auto arg : args) {
+ if (arg.getType().template isa<TensorType>()) res.push_back(arg);
+ }
+ return res;
+ };
+ if (traceDispatchTensors) {
+ builder.create<TensorTraceOp>(regionOp.getLoc(),
+ getTensorTypeArgs(newArgs));
+ }
+
// Create the dispatch op to the executable function.
auto dispatchOp = builder.create<DispatchOp>(
regionOp.getLoc(), executableOp.getName(), entryPointOp.getName(),
regionOp.workload(), outlinedFuncOp.getType().getResults(), newArgs);
+ if (traceDispatchTensors) {
+ builder.create<TensorTraceOp>(regionOp.getLoc(),
+ getTensorTypeArgs(dispatchOp.getResults()));
+ }
+
// Replace uses of the existing results with the new results.
for (int i = 0; i < regionOp.getNumResults(); ++i) {
regionOp.getResult(i).replaceAllUsesWith(dispatchOp.getResult(i));
diff --git a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertTensorOps.cpp b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertTensorOps.cpp
index 2947b70..6fcadbd 100644
--- a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertTensorOps.cpp
+++ b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertTensorOps.cpp
@@ -19,6 +19,7 @@
#include "iree/compiler/Dialect/HAL/Utils/TypeUtils.h"
#include "iree/compiler/Dialect/IREE/IR/IREETypes.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BlockAndValueMapping.h"
@@ -120,13 +121,36 @@
}
};
+class TensorTraceOpConversion
+ : public OpConversionPattern<IREE::Flow::TensorTraceOp> {
+ public:
+ TensorTraceOpConversion(MLIRContext *ctx, TypeConverter &converter)
+ : OpConversionPattern(ctx) {}
+
+ LogicalResult matchAndRewrite(
+ IREE::Flow::TensorTraceOp traceOp, llvm::ArrayRef<Value> rawOperands,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = traceOp.getLoc();
+ SmallVector<Value, 4> bufferViews;
+ for (auto operand : llvm::enumerate(rawOperands)) {
+ auto adaptor = IREE::HAL::TensorRewriteAdaptor::get(
+ loc, traceOp.getOperand(operand.index()), operand.value(), rewriter);
+ bufferViews.emplace_back(adaptor.getBufferView());
+ }
+ rewriter.replaceOpWithNewOp<IREE::HAL::BufferViewTraceOp>(traceOp,
+ bufferViews);
+ return success();
+ }
+};
+
} // namespace
void populateFlowTensorToHALPatterns(MLIRContext *context,
OwningRewritePatternList &patterns,
TypeConverter &converter) {
patterns.insert<ConstantTensorOpConversion, TensorLoadOpConversion,
- TensorStoreOpConversion>(context, converter);
+ TensorStoreOpConversion, TensorTraceOpConversion>(context,
+ converter);
}
} // namespace iree_compiler
diff --git a/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp b/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp
index 7212549..5abbfa4 100644
--- a/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp
+++ b/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertBufferViewOps.cpp
@@ -118,6 +118,8 @@
context, importSymbols, typeConverter, "hal.buffer_view.dim");
patterns.insert<BufferViewDimsOpConversion>(
context, importSymbols, typeConverter, "hal.buffer_view.dims");
+ patterns.insert<VMImportOpConversion<IREE::HAL::BufferViewTraceOp>>(
+ context, importSymbols, typeConverter, "hal.buffer_view.trace");
}
} // namespace iree_compiler
diff --git a/iree/compiler/Dialect/HAL/IR/HALOps.td b/iree/compiler/Dialect/HAL/IR/HALOps.td
index 0139ddb..9593647 100644
--- a/iree/compiler/Dialect/HAL/IR/HALOps.td
+++ b/iree/compiler/Dialect/HAL/IR/HALOps.td
@@ -927,6 +927,17 @@
let assemblyFormat = [{$buffer_view attr-dict `:` type($result)}];
}
+def HAL_BufferViewTraceOp : HAL_Op<"buffer_view.trace", []> {
+ let summary = [{trace value(s) operation}];
+ let description = [{
+ Trace point for dispatchable functions.
+ }];
+
+ let arguments = (ins Variadic<HAL_BufferView>:$operands);
+
+ let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?";
+}
+
//===----------------------------------------------------------------------===//
// iree::hal::CommandBuffer
//===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
index 2e322af..0f6be4a 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
@@ -105,9 +105,7 @@
cc_library(
name = "LLVMAOTTargetLinker",
hdrs = ["LLVMAOTTargetLinker.h"],
- deps = [
- "//iree/base:file_io",
- ] + platform_trampoline_deps("LLVMAOTTargetLinker", "compiler/Dialect/HAL/Target/LLVM"),
+ deps = platform_trampoline_deps("LLVMAOTTargetLinker", "compiler/Dialect/HAL/Target/LLVM"),
)
cc_library(
@@ -115,6 +113,6 @@
hdrs = ["LLVMAOTTargetLinker.h"],
deps = [
":LLVMTargetOptions",
- "//iree/base:file_io",
+ "//iree/base:status",
],
)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
index 4aa0ad3..0ee00e4 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
@@ -97,7 +97,6 @@
HDRS
"LLVMAOTTargetLinker.h"
DEPS
- iree::base::file_io
iree::compiler::Dialect::HAL::Target::LLVM::internal::LLVMAOTTargetLinker_internal
PUBLIC
)
@@ -109,6 +108,6 @@
"LLVMAOTTargetLinker.h"
DEPS
::LLVMTargetOptions
- iree::base::file_io
+ iree::base::status
PUBLIC
)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 7269089..8cb47b5 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -65,13 +65,10 @@
auto executableOp = cast<ExecutableOp>(targetOp.getParentOp());
auto entryPointOps =
executableOp.getBlock().getOps<ExecutableEntryPointOp>();
- const bool addCInterface = true;
+
for (auto entryPointOp : entryPointOps) {
- std::string funcName =
- addCInterface ? "_mlir_ciface_" + std::string(entryPointOp.sym_name())
- : std::string(entryPointOp.sym_name());
- dyLibExecutableDef.entry_points.push_back("invoke_" + funcName);
- createLLVMInvocationFunc(funcName, llvmModule.get());
+ dyLibExecutableDef.entry_points.push_back(
+ std::string(entryPointOp.sym_name()));
}
// LLVMIR opt passes.
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTargetLinker.h b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTargetLinker.h
index 764ad02..669f17c 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTargetLinker.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTargetLinker.h
@@ -18,7 +18,7 @@
#include <string>
-#include "iree/base/file_io.h"
+#include "iree/base/status.h"
#include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.h"
namespace mlir {
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
index cb2a526..e91441d 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
@@ -44,44 +44,6 @@
return machine;
}
-void createLLVMInvocationFunc(const std::string& name, llvm::Module* module) {
- // TODO(ataei): This is written as a stub in LLVM IR. It would be easier to
- // have this using MLIR and lower it to LLVM like the dispatch function
- // implementation is.
-
- auto& ctx = module->getContext();
- llvm::IRBuilder<> builder(ctx);
- auto var_func = module->getFunction(name);
-
- auto new_type = llvm::FunctionType::get(
- builder.getVoidTy(), builder.getInt8PtrTy()->getPointerTo(),
- /*isVarArg=*/false);
-
- auto new_name = "invoke_" + name;
- auto func_cst = module->getOrInsertFunction(new_name, new_type);
- llvm::Function* interface_func =
- llvm::cast<llvm::Function>(func_cst.getCallee());
-
- auto bb = llvm::BasicBlock::Create(ctx);
- bb->insertInto(interface_func);
- builder.SetInsertPoint(bb);
- llvm::Value* argList = interface_func->arg_begin();
- llvm::SmallVector<llvm::Value*, 8> args;
- args.reserve(llvm::size(var_func->args()));
- for (auto& indexedArg : llvm::enumerate(var_func->args())) {
- llvm::Value* arg_index = llvm::Constant::getIntegerValue(
- builder.getInt64Ty(), llvm::APInt(64, indexedArg.index()));
- llvm::Value* arg_ptr_ptr = builder.CreateGEP(argList, arg_index);
- llvm::Value* arg_ptr = builder.CreateLoad(arg_ptr_ptr);
- arg_ptr = builder.CreateBitCast(
- arg_ptr, indexedArg.value().getType()->getPointerTo());
- llvm::Value* arg = builder.CreateLoad(arg_ptr);
- args.push_back(arg);
- }
- builder.CreateCall(var_func, args);
- builder.CreateRetVoid();
-}
-
LogicalResult runLLVMIRPasses(const LLVMTargetOptions& options,
llvm::TargetMachine* machine,
llvm::Module* module) {
@@ -91,7 +53,8 @@
llvm::ModuleAnalysisManager moduleAnalysisManager;
llvm::PassInstrumentationCallbacks passInstrumentationCallbacks;
- llvm::StandardInstrumentations standardInstrumentations;
+ llvm::StandardInstrumentations standardInstrumentations(
+ /*DebugLogging=*/false);
standardInstrumentations.registerCallbacks(passInstrumentationCallbacks);
llvm::PassBuilder passBuilder(machine, options.pipelineTuningOptions, {},
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
index 199e36f..37ee1ba 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
@@ -31,9 +31,6 @@
std::unique_ptr<llvm::TargetMachine> createTargetMachine(
const LLVMTargetOptions& options);
-// Creates an invocation function in a module for the given function name.
-void createLLVMInvocationFunc(const std::string& name, llvm::Module* module);
-
// Creates and runs LLVMIR optimization passes defined in LLVMTargetOptions.
LogicalResult runLLVMIRPasses(const LLVMTargetOptions& options,
llvm::TargetMachine* machine,
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRTarget.cpp
index 98c0bf4..96bb5ac 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRTarget.cpp
@@ -58,13 +58,9 @@
auto executableOp = cast<IREE::HAL::ExecutableOp>(targetOp.getParentOp());
auto entryPointOps =
executableOp.getBlock().getOps<IREE::HAL::ExecutableEntryPointOp>();
- const bool addCInterface = true;
for (auto entryPointOp : entryPointOps) {
- std::string funcName =
- addCInterface ? "_mlir_ciface_" + std::string(entryPointOp.sym_name())
- : std::string(entryPointOp.sym_name());
- llvmIrExecutableDef.entry_points.push_back(funcName);
- createLLVMInvocationFunc(funcName, llvmModule.get());
+ llvmIrExecutableDef.entry_points.push_back(
+ std::string(entryPointOp.sym_name()));
}
// LLVMIR opt passes.
@@ -74,8 +70,9 @@
options_.targetTriple);
return failure();
}
- if (failed(
- runLLVMIRPasses(options_, targetMachine.get(), llvmModule.get()))) {
+ LogicalResult translationResult =
+ runLLVMIRPasses(options_, targetMachine.get(), llvmModule.get());
+ if (failed(translationResult)) {
return targetOp.emitError(
"Can't build LLVMIR opt passes for ExecutableOp module");
}
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
index c3ba845..19c3372 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
@@ -24,5 +24,6 @@
deps = [
"//iree/base:status",
"//iree/compiler/Dialect/HAL/Target/LLVM:LLVMAOTTargetLinker_hdrs",
+ "@llvm-project//llvm:Support",
],
)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
index 18c9c7a..b91dae5 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
@@ -20,6 +20,7 @@
SRCS
"LLVMAOTTargetLinker.cpp"
DEPS
+ LLVMSupport
iree::base::status
iree::compiler::Dialect::HAL::Target::LLVM::LLVMAOTTargetLinker_hdrs
PUBLIC
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/LLVMAOTTargetLinker.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/LLVMAOTTargetLinker.cpp
index e5108d6..65d8e22 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/LLVMAOTTargetLinker.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/LLVMAOTTargetLinker.cpp
@@ -15,6 +15,7 @@
#include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTargetLinker.h"
#include "iree/base/status.h"
+#include "llvm/Support/ToolOutputFile.h"
namespace mlir {
namespace iree_compiler {
@@ -23,18 +24,47 @@
iree::StatusOr<std::string> linkLLVMAOTObjects(
const std::string& linkerToolPath, const std::string& objData) {
- std::string archiveFile, sharedLibFile;
- ASSIGN_OR_RETURN(archiveFile, iree::file_io::GetTempFile("objfile"));
- RETURN_IF_ERROR(iree::file_io::SetFileContents(archiveFile, objData));
- ASSIGN_OR_RETURN(sharedLibFile, iree::file_io::GetTempFile("dylibfile"));
- std::string linkingCmd =
- linkerToolPath + " -shared " + archiveFile + " -o " + sharedLibFile;
+ llvm::SmallString<32> objFilePath, dylibFilePath;
+ if (std::error_code error = llvm::sys::fs::createTemporaryFile(
+ "llvmaot_dylibs", "objfile", objFilePath)) {
+ return iree::InternalErrorBuilder(IREE_LOC)
+ << "Failed to generate temporary file for objfile : '"
+ << error.message() << "'";
+ }
+ if (std::error_code error = llvm::sys::fs::createTemporaryFile(
+ "llvmaot_dylibs", "dylibfile", dylibFilePath)) {
+ return iree::InternalErrorBuilder(IREE_LOC)
+ << "Failed to generate temporary file for dylib : '"
+ << error.message() << "'";
+ }
+ std::error_code error;
+ auto outputFile = std::make_unique<llvm::ToolOutputFile>(
+ objFilePath, error, llvm::sys::fs::F_None);
+ if (error) {
+ return iree::InternalErrorBuilder(IREE_LOC)
+ << "Failed to open temporary objfile '" << objFilePath.c_str()
+ << "' for dylib : '" << error.message() << "'";
+ }
+
+ outputFile->os() << objData;
+ outputFile->os().flush();
+
+ auto linkingCmd =
+ (linkerToolPath + " -shared " + objFilePath + " -o " + dylibFilePath)
+ .str();
int systemRet = system(linkingCmd.c_str());
if (systemRet != 0) {
return iree::InternalErrorBuilder(IREE_LOC)
<< linkingCmd << " failed with exit code " << systemRet;
}
- return iree::file_io::GetFileContents(sharedLibFile);
+
+ auto dylibData = llvm::MemoryBuffer::getFile(dylibFilePath);
+ if (!dylibData) {
+ return iree::InternalErrorBuilder(IREE_LOC)
+ << "Failed to read temporary dylib file '" << dylibFilePath.c_str()
+ << "'";
+ }
+ return dylibData.get()->getBuffer().str();
}
iree::StatusOr<std::string> linkLLVMAOTObjectsWithLLDElf(
diff --git a/iree/compiler/Dialect/HAL/hal.imports.mlir b/iree/compiler/Dialect/HAL/hal.imports.mlir
index 224bb7b..f3d8c62 100644
--- a/iree/compiler/Dialect/HAL/hal.imports.mlir
+++ b/iree/compiler/Dialect/HAL/hal.imports.mlir
@@ -214,6 +214,11 @@
) -> (i32, i32, i32, i32)
attributes {nosideeffects}
+// Prints out the content of buffers.
+vm.import @buffer_view.trace(
+ %operands : !vm.ref<!hal.buffer_view> ...
+)
+
//===----------------------------------------------------------------------===//
// iree::hal::CommandBuffer
//===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir b/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
index 9486f96..39c18ce 100644
--- a/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
+++ b/iree/compiler/Dialect/IREE/Transforms/test/drop_compiler_hints.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt -split-input-file -iree-drop-compiler-hints %s | IreeFileCheck --implicit-check-not="iree.do_not_optimize" %s
-// This file is used as an example in docs/developer_overview.md.
+// This file is used as an example in docs/developing_iree/developer_overview.md.
// If you move or delete it, please update the documentation accordingly.
// CHECK-LABEL: @constant
diff --git a/iree/compiler/Dialect/Shape/Conversion/ConvertShapeToShapex.cpp b/iree/compiler/Dialect/Shape/Conversion/ConvertShapeToShapex.cpp
index 1f2a0e6..decf426 100644
--- a/iree/compiler/Dialect/Shape/Conversion/ConvertShapeToShapex.cpp
+++ b/iree/compiler/Dialect/Shape/Conversion/ConvertShapeToShapex.cpp
@@ -100,8 +100,28 @@
}
auto resultType =
RankedShapeType::get(tensorType.getShape(), rewriter.getContext());
- rewriter.replaceOpWithNewOp<Shape::GetRankedShapeOp>(op, resultType,
- operands[0]);
+ // TODO(jpienaar): The following needs to be re-evaluated once the patch
+ // train from 2020/07/23 integrates properly. This is required to make
+ // it forward and backwards compatible. Also, tests need to be added once
+ // upstream integrates (and this can be tested).
+ // rewriter.replaceOpWithNewOp<Shape::GetRankedShapeOp>(op, resultType,
+ // operands[0]);
+ auto getRanked = rewriter.create<Shape::GetRankedShapeOp>(
+ op.getLoc(), resultType, operands[0]);
+
+ // For FromExtentTensorOp users, just forward the result from GetRanked.
+ SmallPtrSet<Operation *, 2> toDelete;
+ for (auto use : op.getOperation()->getUsers()) {
+ if (isa<FromExtentTensorOp>(use)) {
+ use->replaceAllUsesWith(getRanked);
+ toDelete.insert(use);
+ }
+ }
+ for (Operation *use : toDelete) {
+ rewriter.eraseOp(use);
+ }
+
+ rewriter.replaceOp(op.getOperation(), getRanked.getResult());
return success();
}
};
diff --git a/iree/compiler/Dialect/Shape/Conversion/test/shape_to_shapex.mlir b/iree/compiler/Dialect/Shape/Conversion/test/shape_to_shapex.mlir
index 5017c18..ea613c9 100644
--- a/iree/compiler/Dialect/Shape/Conversion/test/shape_to_shapex.mlir
+++ b/iree/compiler/Dialect/Shape/Conversion/test/shape_to_shapex.mlir
@@ -5,8 +5,8 @@
// CHECK-LABEL: func @f
func @f(%arg0: tensor<?xf32>) {
// CHECK: shapex.const_ranked_shape : !shapex.ranked_shape<[1,2,3]>
- %0 = shape.const_shape [1, 2, 3]
- "foo.use"(%0) : (!shape.shape) -> ()
+ %0 = shape.const_shape [1, 2, 3] : tensor<?xindex>
+ "foo.use"(%0) : (tensor<?xindex>) -> ()
return
}
diff --git a/iree/compiler/Dialect/Shape/IR/ShapeInterface.h b/iree/compiler/Dialect/Shape/IR/ShapeInterface.h
index a200bfd..b36890e 100644
--- a/iree/compiler/Dialect/Shape/IR/ShapeInterface.h
+++ b/iree/compiler/Dialect/Shape/IR/ShapeInterface.h
@@ -58,7 +58,10 @@
}
template <typename BuilderTy, typename... ConstructorArgs>
- BuilderTy &make(ConstructorArgs &&... args) {
+ // TODO(suderman): Re-enable clang-format when new version migrates.
+ // clang-format off
+ BuilderTy &make(ConstructorArgs &&...args) {
+ // clang-format on
auto instance =
std::make_unique<BuilderTy>(std::forward<ConstructorArgs>(args)...);
BuilderTy *unowned = instance.get();
diff --git a/iree/compiler/Dialect/Shape/IR/ShapeOps.cpp b/iree/compiler/Dialect/Shape/IR/ShapeOps.cpp
index b81c600..de5a06f 100644
--- a/iree/compiler/Dialect/Shape/IR/ShapeOps.cpp
+++ b/iree/compiler/Dialect/Shape/IR/ShapeOps.cpp
@@ -321,7 +321,9 @@
SmallVectorImpl<Type> &inferredReturnTypes) {
auto inputType = operands[0].getType().dyn_cast<RankedTensorType>();
if (!inputType || !isValidTensorOfExtents(inputType)) {
- return failure();
+ return emitOptionalError(location, "Invalid input type, ",
+ operands[0].getType(),
+ ", for from_extent_tensor op");
}
SmallVector<int64_t, 6> extents(inputType.getDimSize(0),
static_cast<int64_t>(-1));
diff --git a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp
index 8d37751..9024e88 100644
--- a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp
+++ b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp
@@ -308,6 +308,130 @@
return builder.create<MakeRankedShapeOp>(loc, resultShape, dynamicDims);
}
+Value rewriteTorchIndexSelect(RankedShapeType resultShape,
+ TorchIndexSelectOp torchIndexSelectOp,
+ OpBuilder &builder) {
+ if (!torchIndexSelectOp) return nullptr;
+ auto loc = torchIndexSelectOp.getLoc();
+
+ int64_t resultShapeRank = resultShape.getRank();
+ auto paramsType =
+ torchIndexSelectOp.input().getType().dyn_cast<RankedTensorType>();
+ auto indicesType =
+ torchIndexSelectOp.index().getType().dyn_cast<RankedTensorType>();
+ if (!paramsType || !indicesType) {
+ return nullptr;
+ }
+
+ auto axis = torchIndexSelectOp.dim();
+ auto batchDim = torchIndexSelectOp.batch_dims();
+ int64_t paramsRank = paramsType.getRank();
+ int64_t indicesRank = indicesType.getRank();
+
+ std::vector<int64_t> shape(paramsType.getShape());
+ int64_t axisValue = axis.getSExtValue();
+ int64_t batchDimValue = batchDim.getSExtValue();
+
+ // For neg axis values, we wrap around params,
+ // e.g. axis = -1 => params[:-1]
+ if (axisValue < 0) {
+ axisValue += paramsRank;
+ }
+ if (batchDimValue < 0) {
+ batchDimValue += indicesRank;
+ }
+
+ // params must be at least rank axis + 1
+ if (paramsRank < axisValue + 1) {
+ return nullptr;
+ }
+
+ auto paramsShapeValue = builder.create<GetRankedShapeOp>(
+ loc, RankedShapeType::get(paramsType.getShape(), builder.getContext()),
+ torchIndexSelectOp.input());
+ auto indicesShapeValue = builder.create<GetRankedShapeOp>(
+ loc, RankedShapeType::get(indicesType.getShape(), builder.getContext()),
+ torchIndexSelectOp.index());
+
+ SmallVector<Value, 4> dynamicDims;
+#define GENERATE_RANKED_DIM_OP(value, index) \
+ do { \
+ auto dimValue = builder.create<RankedDimOp>( \
+ loc, builder.getIndexType(), value, builder.getI64IntegerAttr(index)); \
+ dynamicDims.push_back(dimValue); \
+ } while (0)
+
+ if (indicesRank == 0) {
+ // Scalar indices (output is rank(params) - 1).
+ if (resultShapeRank != paramsRank - 1) {
+ return nullptr;
+ }
+
+ // params.shape[:axis] + params.shape[axis+1:]
+ for (int64_t i = 0; i < paramsRank; ++i) {
+ if ((i == axisValue) || (i < axisValue && !resultShape.isDimDynamic(i)) ||
+ (i > axisValue && !resultShape.isDimDynamic(i - 1)))
+ continue;
+ GENERATE_RANKED_DIM_OP(paramsShapeValue, i);
+ }
+ } else if (indicesRank == 1) {
+ // Vector indices (output is rank(params)).
+ // Copy indices.shape into params.shape[axis]
+ if (resultShapeRank != paramsRank) {
+ return nullptr;
+ }
+
+ // params.shape[:axis] + indices.shape[batch_dims:]
+ // + params.shape[indicesRank-batchDim+axisValue:]
+ int resultShapeIndex = 0;
+ // params.shape[:axis]
+ for (int64_t i = 0; i < axisValue; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(paramsShapeValue, i);
+ }
+ // indices.shape[:batchDim]
+ for (int64_t i = batchDimValue;
+ i < indicesRank && resultShapeIndex < resultShapeRank; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(indicesShapeValue, i);
+ }
+ // params.shape[indicesRank-batchDim+axisValue:]
+ // resultShapeIndex == indicesRank-batchDim+axisValue
+ for (int64_t i = resultShapeIndex; i < resultShapeRank; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(paramsShapeValue, i);
+ }
+ } else {
+ // params.shape[:axis] + indices.shape[batch_dims:] + params.shape[axis +
+ // 1:]
+ // The expected rank is (paramsRank-1) + (indicesRank-batchDim)
+ auto expectedRank = paramsRank - 1 + indicesRank - batchDimValue;
+ if (resultShapeRank != expectedRank) {
+ return nullptr;
+ }
+
+ int resultShapeIndex = 0;
+ for (int64_t i = 0; i < axisValue; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(paramsShapeValue, i);
+ }
+
+ for (int64_t i = batchDimValue; i < indicesRank; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(indicesShapeValue, i);
+ }
+
+ for (int64_t i = axisValue + 1;
+ i < paramsRank && resultShapeIndex < resultShapeRank; ++i) {
+ if (!resultShape.isDimDynamic(resultShapeIndex++)) continue;
+ GENERATE_RANKED_DIM_OP(paramsShapeValue, i);
+ }
+ }
+#undef GENERATE_RANKED_DIM_OP
+
+ return builder.create<MakeRankedShapeOp>(loc, resultShape, dynamicDims);
+}
+
} // namespace
// Creates a custom op shape builder for XLA-HLO ops that are not otherwise
@@ -340,6 +464,8 @@
b.insertOpRankedShapeBuilder<TransposeOp>(rewriteTranspose);
b.insertOpRankedShapeBuilder<mhlo::DotGeneralOp>(rewriteDotGeneral);
b.insertOpRankedShapeBuilder<mhlo::DynamicReshapeOp>(rewriteDynamicReshape);
+ b.insertOpRankedShapeBuilder<mhlo::TorchIndexSelectOp>(
+ rewriteTorchIndexSelect);
}
} // namespace mhlo
diff --git a/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp b/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
index 668442f..de6e351 100644
--- a/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
+++ b/iree/compiler/Dialect/VM/Conversion/TypeConverter.cpp
@@ -80,13 +80,12 @@
});
// TODO(b/145876978): materialize conversion for other types
- addMaterialization([](PatternRewriter &rewriter,
- Shape::RankedShapeType resultType, ValueRange inputs,
- Location loc) -> Optional<Value> {
+ addArgumentMaterialization([](OpBuilder &builder,
+ Shape::RankedShapeType resultType,
+ ValueRange inputs, Location loc) -> Value {
LLVM_DEBUG(llvm::dbgs()
<< "MATERIALIZE CONVERSION: " << resultType << "\n");
- return rewriter.create<Shape::MakeRankedShapeOp>(loc, resultType, inputs)
- .getResult();
+ return builder.create<Shape::MakeRankedShapeOp>(loc, resultType, inputs);
});
}
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
similarity index 64%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
index e4cc270..f4f29c9 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
@@ -1,5 +1,3 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+if(${IREE_ENABLE_EMITC})
+ iree_add_all_subdirs()
+
+ iree_cc_library(
+ NAME
+ VMToEmitC
+ HDRS
+ "ConvertVMToEmitC.h"
+ SRCS
+ "ConvertVMToEmitC.cpp"
+ DEPS
+ MLIRIR
+ MLIRPass
+ MLIREmitC
+ MLIRTransforms
+ iree::compiler::Dialect::VM::IR
+ PUBLIC
+ )
+endif()
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
new file mode 100644
index 0000000..1d6d96e
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
@@ -0,0 +1,108 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h"
+
+#include "emitc/Dialect/EmitC/EmitCDialect.h"
+#include "iree/compiler/Dialect/VM/IR/VMOps.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/Module.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+namespace iree_compiler {
+
+namespace {
+
+// Taken over from StandardToVM.
+// We need to replace the Op depending on the operand.
+// We could start with a conversion for IREE::VM::AddI32Op
+template <typename SrcOpTy, typename DstOpTy>
+class BinaryArithmeticOpConversion : public OpConversionPattern<SrcOpTy> {
+ using OpConversionPattern<SrcOpTy>::OpConversionPattern;
+
+ public:
+ BinaryArithmeticOpConversion(MLIRContext *context, StringRef funcName)
+ : OpConversionPattern<SrcOpTy>(context), funcName(funcName) {}
+
+ private:
+ LogicalResult matchAndRewrite(
+ SrcOpTy srcOp, ArrayRef<Value> operands,
+ ConversionPatternRewriter &rewriter) const override {
+ typename SrcOpTy::Adaptor srcAdapter(operands);
+
+ StringAttr callee = rewriter.getStringAttr(funcName);
+ ArrayAttr args =
+ rewriter.getArrayAttr({IntegerAttr::get(rewriter.getIndexType(), 0),
+ IntegerAttr::get(rewriter.getIndexType(), 1)});
+ ValueRange dstOperands{srcAdapter.lhs(), srcAdapter.rhs()};
+
+ rewriter.replaceOpWithNewOp<DstOpTy>(srcOp, srcAdapter.lhs().getType(),
+ callee, args, dstOperands);
+
+ return success();
+ }
+
+ StringRef funcName;
+};
+
+} // namespace
+
+void populateVMToCPatterns(MLIRContext *context,
+ OwningRewritePatternList &patterns) {
+ patterns.insert<
+ BinaryArithmeticOpConversion<IREE::VM::AddI32Op, mlir::emitc::CallOp>>(
+ context, "vm_add_i32");
+}
+
+namespace IREE {
+namespace VM {
+
+namespace {
+
+// A pass converting IREE VM operations into the EmitC dialect.
+class ConvertVMToEmitCPass
+ : public PassWrapper<ConvertVMToEmitCPass,
+ OperationPass<IREE::VM::ModuleOp>> {
+ void runOnOperation() {
+ ConversionTarget target(getContext());
+
+ OwningRewritePatternList patterns;
+ populateVMToCPatterns(&getContext(), patterns);
+
+ target.addLegalDialect<mlir::emitc::EmitCDialect>();
+ target.addLegalDialect<IREE::VM::VMDialect>();
+ target.addIllegalOp<IREE::VM::AddI32Op>();
+
+ if (failed(applyFullConversion(getOperation(), target, patterns))) {
+ return signalPassFailure();
+ }
+ }
+};
+
+} // namespace
+
+std::unique_ptr<OperationPass<IREE::VM::ModuleOp>>
+createConvertVMToEmitCPass() {
+ return std::make_unique<ConvertVMToEmitCPass>();
+}
+
+} // namespace VM
+} // namespace IREE
+
+static PassRegistration<IREE::VM::ConvertVMToEmitCPass> pass(
+ "iree-convert-vm-to-emitc", "Convert VM Ops to the EmitC dialect");
+
+} // namespace iree_compiler
+} // namespace mlir
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
new file mode 100644
index 0000000..a1bc545
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
@@ -0,0 +1,38 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_CONVERTVMTOEMITC_H_
+#define IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_CONVERTVMTOEMITC_H_
+
+#include "iree/compiler/Dialect/VM/IR/VMOps.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace iree_compiler {
+
+void populateVMToCPatterns(MLIRContext *context,
+ OwningRewritePatternList &patterns);
+
+namespace IREE {
+namespace VM {
+
+std::unique_ptr<OperationPass<IREE::VM::ModuleOp>> createConvertVMToEmitCPass();
+
+} // namespace VM
+} // namespace IREE
+
+} // namespace iree_compiler
+} // namespace mlir
+
+#endif // IREE_COMPILER_DIALECT_VM_CONVERSION_VMTOEMITC_CONVERTVMTOEMITC_H_
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/CMakeLists.txt
similarity index 68%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/CMakeLists.txt
index e4cc270..fcc538b 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/CMakeLists.txt
@@ -1,5 +1,3 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+iree_add_all_subdirs()
+
+file(GLOB _GLOB_X_MLIR LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS *.mlir)
+iree_lit_test_suite(
+ NAME
+ lit
+ SRCS
+ "${_GLOB_X_MLIR}"
+ DATA
+ iree::tools::IreeFileCheck
+ iree::tools::iree-opt
+)
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/add.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/add.mlir
new file mode 100644
index 0000000..a693fe5
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/add.mlir
@@ -0,0 +1,12 @@
+// RUN: iree-opt -split-input-file -pass-pipeline='iree-convert-vm-to-emitc' %s | IreeFileCheck %s
+
+// CHECK: vm.module @add_module {
+vm.module @add_module {
+ // CHECK-NEXT: vm.func @add_i32(%arg0: i32, %arg1: i32) {
+ vm.func @add_i32(%arg0: i32, %arg1: i32) {
+ // CHECK-NEXT: %0 = emitc.call "vm_add_i32"(%arg0, %arg1) {args = [0 : index, 1 : index]} : (i32, i32) -> i32
+ %0 = vm.add.i32 %arg0, %arg1 : i32
+ // CHECK-NEXT: vm.return
+ vm.return
+ }
+}
diff --git a/iree/compiler/Dialect/VM/IR/VMBase.td b/iree/compiler/Dialect/VM/IR/VMBase.td
index 12fb1a5..9243cef 100644
--- a/iree/compiler/Dialect/VM/IR/VMBase.td
+++ b/iree/compiler/Dialect/VM/IR/VMBase.td
@@ -56,8 +56,8 @@
// VM opcodes
//===----------------------------------------------------------------------===//
// Opcode ranges:
-// 0x00-0x7F: core VM opcodes, reserved for this dialect
-// 0x80-0xFF: unreserved, used by target-specific ops (like SIMD)
+// 0x00-0x9F: core VM opcodes, reserved for this dialect
+// 0xA0-0xFF: unreserved, used to prefix extension op sets
//
// Note that changing existing opcode assignments will invalidate all binaries
// and should only be done when breaking changes are acceptable. We could add a
@@ -67,9 +67,28 @@
// Some opcodes require an extension prefix to indicate that runtime support
// is optional. An op with the ExtI64 trait will require VM_OPC_ExtI64, for
// example. Ops that bridge extension sets have a canonical form that may
-// require multiple prefix codes.
+// require multiple prefix codes (for example, the i64<->f64 extensions).
-class VM_OPC<int opcode, string name> : I32EnumAttrCase<name, opcode>;
+class VM_OPC<int opcode, string name> :
+ IntEnumAttrCaseBase<I8, name, name, opcode>;
+
+class VM_OPC_EnumAttr<string name, string enumName, string enumTag,
+ string description,
+ VM_OPC prefix = ?,
+ list<VM_OPC> cases> :
+ IntEnumAttr<I8, name, description, cases> {
+ let cppNamespace = "IREE::VM";
+ let returnType = cppNamespace # "::" # name;
+ let underlyingType = "uint8_t";
+ let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
+ let constBuilderCall =
+ "$_builder.getI8IntegerAttr(static_cast<int8_t>($0))";
+
+ // Used by VMOpTableGen:
+ string opcodeEnumName = enumName;
+ VM_OPC opcodePrefix = prefix;
+ string opcodeEnumTag = enumTag;
+}
// Globals:
def VM_OPC_GlobalLoadI32 : VM_OPC<0x00, "GlobalLoadI32">;
@@ -131,7 +150,9 @@
def VM_OPC_TruncI32I8 : VM_OPC<0x31, "TruncI32I8">;
def VM_OPC_TruncI32I16 : VM_OPC<0x32, "TruncI32I16">;
def VM_OPC_ExtI8I32S : VM_OPC<0x33, "ExtI8I32S">;
-def VM_OPC_ExtI16I32S : VM_OPC<0x34, "ExtI16I32S">;
+def VM_OPC_ExtI8I32U : VM_OPC<0x34, "ExtI8I32U">;
+def VM_OPC_ExtI16I32S : VM_OPC<0x35, "ExtI16I32S">;
+def VM_OPC_ExtI16I32U : VM_OPC<0x36, "ExtI16I32U">;
// Reduction arithmetic:
@@ -140,12 +161,6 @@
def VM_OPC_CmpNEI32 : VM_OPC<0x41, "CmpNEI32">;
def VM_OPC_CmpLTI32S : VM_OPC<0x42, "CmpLTI32S">;
def VM_OPC_CmpLTI32U : VM_OPC<0x43, "CmpLTI32U">;
-def VM_OPC_CmpLTEI32S : VM_OPC<0x44, "CmpLTEI32S">;
-def VM_OPC_CmpLTEI32U : VM_OPC<0x45, "CmpLTEI32U">;
-def VM_OPC_CmpGTI32S : VM_OPC<0x46, "CmpGTI32S">;
-def VM_OPC_CmpGTI32U : VM_OPC<0x47, "CmpGTI32U">;
-def VM_OPC_CmpGTEI32S : VM_OPC<0x48, "CmpGTEI32S">;
-def VM_OPC_CmpGTEI32U : VM_OPC<0x49, "CmpGTEI32U">;
def VM_OPC_CmpNZI32 : VM_OPC<0x4D, "CmpNZI32">;
def VM_OPC_CmpEQRef : VM_OPC<0x4A, "CmpEQRef">;
def VM_OPC_CmpNERef : VM_OPC<0x4B, "CmpNERef">;
@@ -169,9 +184,175 @@
def VM_OPC_Break : VM_OPC<0x7F, "Break">;
// Extension prefixes:
-def VM_OPC_ExtI64 : VM_OPC<0x80, "ExtI64">;
-def VM_OPC_ExtF32 : VM_OPC<0x81, "ExtF32">;
-def VM_OPC_ExtF64 : VM_OPC<0x82, "ExtF64">;
+def VM_OPC_PrefixExtI64 : VM_OPC<0xA0, "PrefixExtI64">;
+def VM_OPC_PrefixExtF32 : VM_OPC<0xA1, "PrefixExtF32">;
+def VM_OPC_PrefixExtF64 : VM_OPC<0xA2, "PrefixExtF64">;
+
+// Runtime enum iree_vm_core_op_t:
+def VM_CoreOpcodeAttr :
+ VM_OPC_EnumAttr<"Opcode",
+ "iree_vm_core_op_t",
+ "CORE", // IREE_VM_OP_CORE_*
+ "valid VM core operation encodings",
+ ?, [
+ // Core VM opcodes (0x00-0x9F):
+ VM_OPC_GlobalLoadI32,
+ VM_OPC_GlobalStoreI32,
+ VM_OPC_GlobalLoadIndirectI32,
+ VM_OPC_GlobalStoreIndirectI32,
+ VM_OPC_GlobalLoadRef,
+ VM_OPC_GlobalStoreRef,
+ VM_OPC_GlobalLoadIndirectRef,
+ VM_OPC_GlobalStoreIndirectRef,
+ VM_OPC_ConstI32Zero,
+ VM_OPC_ConstI32,
+ VM_OPC_ConstRefZero,
+ VM_OPC_ConstRefRodata,
+ VM_OPC_ListAlloc,
+ VM_OPC_ListReserve,
+ VM_OPC_ListSize,
+ VM_OPC_ListResize,
+ VM_OPC_ListGetI32,
+ VM_OPC_ListSetI32,
+ VM_OPC_ListGetRef,
+ VM_OPC_ListSetRef,
+ VM_OPC_SelectI32,
+ VM_OPC_SelectRef,
+ VM_OPC_SwitchI32,
+ VM_OPC_SwitchRef,
+ VM_OPC_AddI32,
+ VM_OPC_SubI32,
+ VM_OPC_MulI32,
+ VM_OPC_DivI32S,
+ VM_OPC_DivI32U,
+ VM_OPC_RemI32S,
+ VM_OPC_RemI32U,
+ VM_OPC_NotI32,
+ VM_OPC_AndI32,
+ VM_OPC_OrI32,
+ VM_OPC_XorI32,
+ VM_OPC_ShlI32,
+ VM_OPC_ShrI32S,
+ VM_OPC_ShrI32U,
+ VM_OPC_TruncI32I8,
+ VM_OPC_TruncI32I16,
+ VM_OPC_ExtI8I32S,
+ VM_OPC_ExtI8I32U,
+ VM_OPC_ExtI16I32S,
+ VM_OPC_ExtI16I32U,
+ VM_OPC_CmpEQI32,
+ VM_OPC_CmpNEI32,
+ VM_OPC_CmpLTI32S,
+ VM_OPC_CmpLTI32U,
+ VM_OPC_CmpNZI32,
+ VM_OPC_CmpEQRef,
+ VM_OPC_CmpNERef,
+ VM_OPC_CmpNZRef,
+ VM_OPC_Branch,
+ VM_OPC_CondBranch,
+ VM_OPC_Call,
+ VM_OPC_CallVariadic,
+ VM_OPC_Return,
+ VM_OPC_Fail,
+ VM_OPC_Yield,
+ VM_OPC_Trace,
+ VM_OPC_Print,
+ VM_OPC_CondBreak,
+ VM_OPC_Break,
+
+ // Extension opcodes (0xA0-0xFF):
+ VM_OPC_PrefixExtI64, // VM_ExtI64OpcodeAttr
+ VM_OPC_PrefixExtF32, // VM_ExtF32OpcodeAttr
+ VM_OPC_PrefixExtF64, // VM_ExtF64OpcodeAttr
+ ]>;
+
+// i64 extension:
+// (ops are encoded as a VM_OPC_ExtI64 + the opcode below)
+def VM_OPC_GlobalLoadI64 : VM_OPC<0x00, "GlobalLoadI64">;
+def VM_OPC_GlobalStoreI64 : VM_OPC<0x01, "GlobalStoreI64">;
+def VM_OPC_GlobalLoadIndirectI64 : VM_OPC<0x02, "GlobalLoadIndirectI64">;
+def VM_OPC_GlobalStoreIndirectI64: VM_OPC<0x03, "GlobalStoreIndirectI64">;
+def VM_OPC_ConstI64Zero : VM_OPC<0x08, "ConstI64Zero">;
+def VM_OPC_ConstI64 : VM_OPC<0x09, "ConstI64">;
+def VM_OPC_ListGetI64 : VM_OPC<0x14, "ListGetI64">;
+def VM_OPC_ListSetI64 : VM_OPC<0x15, "ListSetI64">;
+def VM_OPC_SelectI64 : VM_OPC<0x1E, "SelectI64">;
+def VM_OPC_SwitchI64 : VM_OPC<0x20, "SwitchI64">;
+def VM_OPC_AddI64 : VM_OPC<0x22, "AddI64">;
+def VM_OPC_SubI64 : VM_OPC<0x23, "SubI64">;
+def VM_OPC_MulI64 : VM_OPC<0x24, "MulI64">;
+def VM_OPC_DivI64S : VM_OPC<0x25, "DivI64S">;
+def VM_OPC_DivI64U : VM_OPC<0x26, "DivI64U">;
+def VM_OPC_RemI64S : VM_OPC<0x27, "RemI64S">;
+def VM_OPC_RemI64U : VM_OPC<0x28, "RemI64U">;
+def VM_OPC_NotI64 : VM_OPC<0x29, "NotI64">;
+def VM_OPC_AndI64 : VM_OPC<0x2A, "AndI64">;
+def VM_OPC_OrI64 : VM_OPC<0x2B, "OrI64">;
+def VM_OPC_XorI64 : VM_OPC<0x2C, "XorI64">;
+def VM_OPC_ShlI64 : VM_OPC<0x2D, "ShlI64">;
+def VM_OPC_ShrI64S : VM_OPC<0x2E, "ShrI64S">;
+def VM_OPC_ShrI64U : VM_OPC<0x2F, "ShrI64U">;
+def VM_OPC_TruncI64I8 : VM_OPC<0x30, "TruncI64I8">;
+def VM_OPC_TruncI64I16 : VM_OPC<0x31, "TruncI64I16">;
+def VM_OPC_TruncI64I32 : VM_OPC<0x32, "TruncI64I32">;
+def VM_OPC_ExtI8I64S : VM_OPC<0x33, "ExtI8I64S">;
+def VM_OPC_ExtI8I64U : VM_OPC<0x34, "ExtI8I64U">;
+def VM_OPC_ExtI16I64S : VM_OPC<0x35, "ExtI16I64S">;
+def VM_OPC_ExtI16I64U : VM_OPC<0x36, "ExtI16I64U">;
+def VM_OPC_ExtI32I64S : VM_OPC<0x37, "ExtI32I64S">;
+def VM_OPC_ExtI32I64U : VM_OPC<0x38, "ExtI32I64U">;
+def VM_OPC_CmpEQI64 : VM_OPC<0x40, "CmpEQI64">;
+def VM_OPC_CmpNEI64 : VM_OPC<0x41, "CmpNEI64">;
+def VM_OPC_CmpLTI64S : VM_OPC<0x42, "CmpLTI64S">;
+def VM_OPC_CmpLTI64U : VM_OPC<0x43, "CmpLTI64U">;
+def VM_OPC_CmpNZI64 : VM_OPC<0x4D, "CmpNZI64">;
+
+// Runtime enum iree_vm_ext_i64_op_t:
+def VM_ExtI64OpcodeAttr :
+ VM_OPC_EnumAttr<"ExtI64Opcode",
+ "iree_vm_ext_i64_op_t",
+ "EXT_I64", // IREE_VM_OP_EXT_I64_*
+ "valid VM operation encodings in the i64 extension",
+ VM_OPC_PrefixExtI64, [
+ VM_OPC_GlobalLoadI64,
+ VM_OPC_GlobalStoreI64,
+ VM_OPC_GlobalLoadIndirectI64,
+ VM_OPC_GlobalStoreIndirectI64,
+ VM_OPC_ConstI64Zero,
+ VM_OPC_ConstI64,
+ VM_OPC_ListGetI64,
+ VM_OPC_ListSetI64,
+ VM_OPC_SelectI64,
+ VM_OPC_SwitchI64,
+ VM_OPC_AddI64,
+ VM_OPC_SubI64,
+ VM_OPC_MulI64,
+ VM_OPC_DivI64S,
+ VM_OPC_DivI64U,
+ VM_OPC_RemI64S,
+ VM_OPC_RemI64U,
+ VM_OPC_NotI64,
+ VM_OPC_AndI64,
+ VM_OPC_OrI64,
+ VM_OPC_XorI64,
+ VM_OPC_ShlI64,
+ VM_OPC_ShrI64S,
+ VM_OPC_ShrI64U,
+ VM_OPC_TruncI64I8,
+ VM_OPC_TruncI64I16,
+ VM_OPC_TruncI64I32,
+ VM_OPC_ExtI8I64S,
+ VM_OPC_ExtI8I64U,
+ VM_OPC_ExtI16I64S,
+ VM_OPC_ExtI16I64U,
+ VM_OPC_ExtI32I64S,
+ VM_OPC_ExtI32I64U,
+ VM_OPC_CmpEQI64,
+ VM_OPC_CmpNEI64,
+ VM_OPC_CmpLTI64S,
+ VM_OPC_CmpLTI64U,
+ VM_OPC_CmpNZI64,
+ ]>;
//===----------------------------------------------------------------------===//
// Declarative encoding framework
diff --git a/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp b/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
index 521ef06..b3ceb17 100644
--- a/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
+++ b/iree/compiler/Dialect/VM/IR/VMOpFolders.cpp
@@ -605,11 +605,21 @@
operands, [&](APInt a) { return a.trunc(8).sext(32); });
}
+OpFoldResult ExtI8I32UOp::fold(ArrayRef<Attribute> operands) {
+ return constFoldUnaryOp<IntegerAttr>(
+ operands, [&](APInt a) { return a.trunc(8).zext(32); });
+}
+
OpFoldResult ExtI16I32SOp::fold(ArrayRef<Attribute> operands) {
return constFoldUnaryOp<IntegerAttr>(
operands, [&](APInt a) { return a.trunc(16).sext(32); });
}
+OpFoldResult ExtI16I32UOp::fold(ArrayRef<Attribute> operands) {
+ return constFoldUnaryOp<IntegerAttr>(
+ operands, [&](APInt a) { return a.trunc(16).zext(32); });
+}
+
//===----------------------------------------------------------------------===//
// Native reduction (horizontal) arithmetic
//===----------------------------------------------------------------------===//
@@ -706,9 +716,7 @@
}
void CmpLTI32SOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
- MLIRContext *context) {
- results.insert<SwapInvertedCmpOps<CmpLTI32SOp, CmpGTEI32SOp>>(context);
-}
+ MLIRContext *context) {}
OpFoldResult CmpLTI32UOp::fold(ArrayRef<Attribute> operands) {
if (lhs() == rhs()) {
@@ -720,9 +728,27 @@
}
void CmpLTI32UOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
- MLIRContext *context) {
- results.insert<SwapInvertedCmpOps<CmpLTI32UOp, CmpGTEI32UOp>>(context);
-}
+ MLIRContext *context) {}
+
+namespace {
+
+/// Rewrites a vm.cmp.lte.* pseudo op to a vm.cmp.lt.* op.
+template <typename T, typename U>
+struct RewritePseudoCmpLTEToLT : public OpRewritePattern<T> {
+ using OpRewritePattern<T>::OpRewritePattern;
+ LogicalResult matchAndRewrite(T op,
+ PatternRewriter &rewriter) const override {
+ // !(lhs > rhs)
+ auto condValue =
+ rewriter.createOrFold<U>(op.getLoc(), op.getType(), op.rhs(), op.lhs());
+ rewriter.replaceOpWithNewOp<XorI32Op>(
+ op, op.getType(), condValue,
+ rewriter.createOrFold<IREE::VM::ConstI32Op>(op.getLoc(), 1));
+ return success();
+ }
+};
+
+} // namespace
OpFoldResult CmpLTEI32SOp::fold(ArrayRef<Attribute> operands) {
if (lhs() == rhs()) {
@@ -736,6 +762,7 @@
void CmpLTEI32SOp::getCanonicalizationPatterns(
OwningRewritePatternList &results, MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpLTEI32SOp, CmpGTI32SOp>>(context);
+ results.insert<RewritePseudoCmpLTEToLT<CmpLTEI32SOp, CmpLTI32SOp>>(context);
}
OpFoldResult CmpLTEI32UOp::fold(ArrayRef<Attribute> operands) {
@@ -750,8 +777,25 @@
void CmpLTEI32UOp::getCanonicalizationPatterns(
OwningRewritePatternList &results, MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpLTEI32UOp, CmpGTI32UOp>>(context);
+ results.insert<RewritePseudoCmpLTEToLT<CmpLTEI32UOp, CmpLTI32UOp>>(context);
}
+namespace {
+
+/// Rewrites a vm.cmp.gt.* pseudo op to a vm.cmp.lt.* op.
+template <typename T, typename U>
+struct RewritePseudoCmpGTToLT : public OpRewritePattern<T> {
+ using OpRewritePattern<T>::OpRewritePattern;
+ LogicalResult matchAndRewrite(T op,
+ PatternRewriter &rewriter) const override {
+ // rhs < lhs
+ rewriter.replaceOpWithNewOp<U>(op, op.getType(), op.rhs(), op.lhs());
+ return success();
+ }
+};
+
+} // namespace
+
OpFoldResult CmpGTI32SOp::fold(ArrayRef<Attribute> operands) {
if (lhs() == rhs()) {
// x > x = false
@@ -764,6 +808,7 @@
void CmpGTI32SOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpGTI32SOp, CmpLTEI32SOp>>(context);
+ results.insert<RewritePseudoCmpGTToLT<CmpGTI32SOp, CmpLTI32SOp>>(context);
}
OpFoldResult CmpGTI32UOp::fold(ArrayRef<Attribute> operands) {
@@ -778,8 +823,29 @@
void CmpGTI32UOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpGTI32UOp, CmpLTEI32UOp>>(context);
+ results.insert<RewritePseudoCmpGTToLT<CmpGTI32UOp, CmpLTI32UOp>>(context);
}
+namespace {
+
+/// Rewrites a vm.cmp.gte.* pseudo op to a vm.cmp.lt.* op.
+template <typename T, typename U>
+struct RewritePseudoCmpGTEToLT : public OpRewritePattern<T> {
+ using OpRewritePattern<T>::OpRewritePattern;
+ LogicalResult matchAndRewrite(T op,
+ PatternRewriter &rewriter) const override {
+ // !(lhs < rhs)
+ auto condValue =
+ rewriter.createOrFold<U>(op.getLoc(), op.getType(), op.lhs(), op.rhs());
+ rewriter.replaceOpWithNewOp<XorI32Op>(
+ op, op.getType(), condValue,
+ rewriter.createOrFold<IREE::VM::ConstI32Op>(op.getLoc(), 1));
+ return success();
+ }
+};
+
+} // namespace
+
OpFoldResult CmpGTEI32SOp::fold(ArrayRef<Attribute> operands) {
if (lhs() == rhs()) {
// x >= x = true
@@ -792,6 +858,7 @@
void CmpGTEI32SOp::getCanonicalizationPatterns(
OwningRewritePatternList &results, MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpGTEI32SOp, CmpLTI32SOp>>(context);
+ results.insert<RewritePseudoCmpGTEToLT<CmpGTEI32SOp, CmpLTI32SOp>>(context);
}
OpFoldResult CmpGTEI32UOp::fold(ArrayRef<Attribute> operands) {
@@ -806,6 +873,7 @@
void CmpGTEI32UOp::getCanonicalizationPatterns(
OwningRewritePatternList &results, MLIRContext *context) {
results.insert<SwapInvertedCmpOps<CmpGTEI32UOp, CmpLTI32UOp>>(context);
+ results.insert<RewritePseudoCmpGTEToLT<CmpGTEI32UOp, CmpLTI32UOp>>(context);
}
OpFoldResult CmpNZI32Op::fold(ArrayRef<Attribute> operands) {
diff --git a/iree/compiler/Dialect/VM/IR/VMOps.td b/iree/compiler/Dialect/VM/IR/VMOps.td
index 7c8f33c..b3a7a4e 100644
--- a/iree/compiler/Dialect/VM/IR/VMOps.td
+++ b/iree/compiler/Dialect/VM/IR/VMOps.td
@@ -1294,11 +1294,21 @@
let hasFolder = 1;
}
+def VM_ExtI8I32UOp : VM_UnaryArithmeticOp<I32, "ext.i8.i32.u", VM_OPC_ExtI8I32U> {
+ let summary = [{integer zero extend 8 bits to 32 bits}];
+ let hasFolder = 1;
+}
+
def VM_ExtI16I32SOp : VM_UnaryArithmeticOp<I32, "ext.i16.i32.s", VM_OPC_ExtI16I32S> {
let summary = [{integer sign extend 16 bits to 32 bits}];
let hasFolder = 1;
}
+def VM_ExtI16I32UOp : VM_UnaryArithmeticOp<I32, "ext.i16.i32.u", VM_OPC_ExtI16I32U> {
+ let summary = [{integer zero extend 16 bits to 32 bits}];
+ let hasFolder = 1;
+}
+
//===----------------------------------------------------------------------===//
// Native reduction (horizontal) arithmetic
//===----------------------------------------------------------------------===//
@@ -1362,6 +1372,27 @@
];
}
+class VM_BinaryComparisonPseudoOp<Type type, string mnemonic,
+ list<OpTrait> traits = []> :
+ VM_PureOp<mnemonic, !listconcat(traits, [
+ AllTypesMatch<["lhs", "rhs"]>,
+ VM_PseudoOp,
+ ])> {
+ let description = [{
+ Compares two operands with the specified predicate.
+ }];
+
+ let arguments = (ins
+ type:$lhs,
+ type:$rhs
+ );
+ let results = (outs
+ I32:$result
+ );
+
+ let assemblyFormat = "operands attr-dict `:` type($lhs)";
+}
+
def VM_CmpEQI32Op :
VM_BinaryComparisonOp<I32, "cmp.eq.i32", VM_OPC_CmpEQI32, [Commutative]> {
let summary = [{integer equality comparison operation}];
@@ -1391,43 +1422,42 @@
}
def VM_CmpLTEI32SOp :
- VM_BinaryComparisonOp<I32, "cmp.lte.i32.s", VM_OPC_CmpLTEI32S> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.lte.i32.s"> {
let summary = [{signed integer less-than-or-equal comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
}
def VM_CmpLTEI32UOp :
- VM_BinaryComparisonOp<I32, "cmp.lte.i32.u", VM_OPC_CmpLTEI32U> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.lte.i32.u"> {
let summary = [{unsigned integer less-than-or-equal comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
}
-// TODO(benvanik): drop these and rely on lt/lte only?
def VM_CmpGTI32SOp :
- VM_BinaryComparisonOp<I32, "cmp.gt.i32.s", VM_OPC_CmpGTI32S> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.gt.i32.s"> {
let summary = [{signed integer greater-than comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
}
def VM_CmpGTI32UOp :
- VM_BinaryComparisonOp<I32, "cmp.gt.i32.u", VM_OPC_CmpGTI32U> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.gt.i32.u"> {
let summary = [{unsigned integer greater-than comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
}
def VM_CmpGTEI32SOp :
- VM_BinaryComparisonOp<I32, "cmp.gte.i32.s", VM_OPC_CmpGTEI32S> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.gte.i32.s"> {
let summary = [{signed integer greater-than-or-equal comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
}
def VM_CmpGTEI32UOp :
- VM_BinaryComparisonOp<I32, "cmp.gte.i32.u", VM_OPC_CmpGTEI32U> {
+ VM_BinaryComparisonPseudoOp<I32, "cmp.gte.i32.u"> {
let summary = [{unsigned integer greater-than-or-equal comparison operation}];
let hasCanonicalizer = 1;
let hasFolder = 1;
diff --git a/iree/compiler/Dialect/VM/IR/test/conversion_folding.mlir b/iree/compiler/Dialect/VM/IR/test/conversion_folding.mlir
index 319202d..02a9ea3 100644
--- a/iree/compiler/Dialect/VM/IR/test/conversion_folding.mlir
+++ b/iree/compiler/Dialect/VM/IR/test/conversion_folding.mlir
@@ -33,6 +33,14 @@
vm.return %0 : i32
}
+ // CHECK-LABEL: @ext_i8_i32_u_const
+ vm.func @ext_i8_i32_u_const() -> i32 {
+ // CHECK: vm.const.i32 255 : i32
+ %c = vm.const.i32 0x000000FF : i32
+ %0 = vm.ext.i8.i32.u %c : i32
+ vm.return %0 : i32
+ }
+
// CHECK-LABEL: @ext_i16_i32_s_const
vm.func @ext_i16_i32_s_const() -> i32 {
// CHECK: vm.const.i32 -1 : i32
@@ -40,4 +48,12 @@
%0 = vm.ext.i16.i32.s %c : i32
vm.return %0 : i32
}
+
+ // CHECK-LABEL: @ext_i16_i32_u_const
+ vm.func @ext_i16_i32_u_const() -> i32 {
+ // CHECK: vm.const.i32 65535 : i32
+ %c = vm.const.i32 0x0000FFFF : i32
+ %0 = vm.ext.i16.i32.u %c : i32
+ vm.return %0 : i32
+ }
}
diff --git a/iree/compiler/Dialect/VM/IR/test/conversion_ops.mlir b/iree/compiler/Dialect/VM/IR/test/conversion_ops.mlir
index c242930..008c5d6 100644
--- a/iree/compiler/Dialect/VM/IR/test/conversion_ops.mlir
+++ b/iree/compiler/Dialect/VM/IR/test/conversion_ops.mlir
@@ -20,8 +20,12 @@
vm.func @ext(%arg0 : i32) -> i32 {
// CHECK-NEXT: %0 = vm.ext.i8.i32.s %arg0 : i32
%0 = vm.ext.i8.i32.s %arg0 : i32
- // CHECK-NEXT: %1 = vm.ext.i16.i32.s %0 : i32
- %1 = vm.ext.i16.i32.s %0 : i32
- vm.return %1 : i32
+ // CHECK-NEXT: %1 = vm.ext.i8.i32.u %0 : i32
+ %1 = vm.ext.i8.i32.u %0 : i32
+ // CHECK-NEXT: %2 = vm.ext.i16.i32.s %1 : i32
+ %2 = vm.ext.i16.i32.s %1 : i32
+ // CHECK-NEXT: %3 = vm.ext.i16.i32.u %2 : i32
+ %3 = vm.ext.i16.i32.u %2 : i32
+ vm.return %3 : i32
}
}
diff --git a/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt b/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt
new file mode 100644
index 0000000..5183093
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+if(${IREE_ENABLE_EMITC})
+ iree_add_all_subdirs()
+
+ iree_cc_library(
+ NAME
+ C
+ HDRS
+ "CModuleTarget.h"
+ SRCS
+ "CModuleTarget.cpp"
+ "TranslationRegistration.cpp"
+ DEPS
+ LLVMSupport
+ MLIRIR
+ MLIRPass
+ MLIRSupport
+ iree::compiler::Dialect::VM::IR
+ iree::compiler::Dialect::VM::Conversion::VMToEmitC
+ PUBLIC
+ )
+endif()
diff --git a/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
new file mode 100644
index 0000000..923be34
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
@@ -0,0 +1,54 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/compiler/Dialect/VM/Target/C/CModuleTarget.h"
+
+#include "iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h"
+#include "mlir/Pass/PassManager.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace VM {
+
+LogicalResult translateModuleToC(IREE::VM::ModuleOp moduleOp,
+ llvm::raw_ostream &output) {
+ // TODO: implement translation
+ output << "// c module stub\n";
+
+ return success();
+}
+
+LogicalResult translateModuleToC(mlir::ModuleOp outerModuleOp,
+ llvm::raw_ostream &output) {
+ PassManager pm(outerModuleOp.getContext());
+
+ pm.addPass(createConvertVMToEmitCPass());
+
+ if (failed(pm.run(outerModuleOp))) {
+ return failure();
+ }
+
+ auto moduleOps = outerModuleOp.getOps<IREE::VM::ModuleOp>();
+ if (moduleOps.empty()) {
+ return outerModuleOp.emitError()
+ << "outer module does not contain a vm.module op";
+ }
+ return translateModuleToC(*moduleOps.begin(), output);
+}
+
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
diff --git a/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h
new file mode 100644
index 0000000..1268b3a
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.h
@@ -0,0 +1,41 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_COMPILER_DIALECT_VM_TARGET_C_CMODULETARGET_H_
+#define IREE_COMPILER_DIALECT_VM_TARGET_C_CMODULETARGET_H_
+
+#include "iree/compiler/Dialect/VM/IR/VMOps.h"
+#include "llvm/Support/raw_ostream.h"
+#include "mlir/IR/Module.h"
+#include "mlir/Support/LogicalResult.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace VM {
+
+// Translates a vm.module to a c module.
+//
+// Exposed via the --iree-vm-ir-to-c-module translation.
+LogicalResult translateModuleToC(IREE::VM::ModuleOp moduleOp,
+ llvm::raw_ostream &output);
+LogicalResult translateModuleToC(mlir::ModuleOp outerModuleOp,
+ llvm::raw_ostream &output);
+
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
+
+#endif // IREE_COMPILER_DIALECT_VM_TARGET_C_CMODULETARGET_H_
diff --git a/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp b/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp
new file mode 100644
index 0000000..bdeea02
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Target/C/TranslationRegistration.cpp
@@ -0,0 +1,34 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/compiler/Dialect/VM/Target/C/CModuleTarget.h"
+#include "mlir/Translation.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace VM {
+
+void registerToCTranslation() {
+ TranslateFromMLIRRegistration toCModule(
+ "iree-vm-ir-to-c-module",
+ [](mlir::ModuleOp moduleOp, llvm::raw_ostream &output) {
+ return translateModuleToC(moduleOp, output);
+ });
+}
+
+} // namespace VM
+} // namespace IREE
+} // namespace iree_compiler
+} // namespace mlir
diff --git a/kokoro/gcp_ubuntu/cmake/continuous.cfg b/iree/compiler/Dialect/VM/Target/C/test/CMakeLists.txt
similarity index 67%
copy from kokoro/gcp_ubuntu/cmake/continuous.cfg
copy to iree/compiler/Dialect/VM/Target/C/test/CMakeLists.txt
index e4cc270..6495524 100644
--- a/kokoro/gcp_ubuntu/cmake/continuous.cfg
+++ b/iree/compiler/Dialect/VM/Target/C/test/CMakeLists.txt
@@ -1,5 +1,3 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Deliberately blank as everything necessary is configured in common files, but
-# file must still exist to match corresponding (Google internal) job
-# configurations that trigger the builds.
+iree_add_all_subdirs()
+
+file(GLOB _GLOB_X_MLIR LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS *.mlir)
+iree_lit_test_suite(
+ NAME
+ lit
+ SRCS
+ "${_GLOB_X_MLIR}"
+ DATA
+ iree::tools::IreeFileCheck
+ iree::tools::iree-translate
+)
diff --git a/iree/compiler/Dialect/VM/Target/C/test/empty_module.mlir b/iree/compiler/Dialect/VM/Target/C/test/empty_module.mlir
new file mode 100644
index 0000000..60ef77a
--- /dev/null
+++ b/iree/compiler/Dialect/VM/Target/C/test/empty_module.mlir
@@ -0,0 +1,5 @@
+// RUN: iree-translate -iree-vm-ir-to-c-module %s | IreeFileCheck %s
+
+// CHECK: // c module stub
+vm.module @empty_module {
+}
diff --git a/iree/compiler/Dialect/VM/Target/CMakeLists.txt b/iree/compiler/Dialect/VM/Target/CMakeLists.txt
index c2d6a1d..fb9d914 100644
--- a/iree/compiler/Dialect/VM/Target/CMakeLists.txt
+++ b/iree/compiler/Dialect/VM/Target/CMakeLists.txt
@@ -12,8 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# bazel_to_cmake: DO NOT EDIT (EmitC is only buildable with CMake)
+
iree_add_all_subdirs()
+if(IREE_ENABLE_EMITC)
+ set(IREE_VM_CONDITIONAL_TARGETS
+ iree::compiler::Dialect::VM::Target::C
+ )
+endif()
+
iree_cc_library(
NAME
init_targets
@@ -21,5 +29,6 @@
"init_targets.h"
DEPS
iree::compiler::Dialect::VM::Target::Bytecode
+ ${IREE_VM_CONDITIONAL_TARGETS}
PUBLIC
)
diff --git a/iree/compiler/Dialect/VM/Target/init_targets.h b/iree/compiler/Dialect/VM/Target/init_targets.h
index 5866a77..6099564 100644
--- a/iree/compiler/Dialect/VM/Target/init_targets.h
+++ b/iree/compiler/Dialect/VM/Target/init_targets.h
@@ -21,6 +21,9 @@
namespace IREE {
namespace VM {
void registerToVMBytecodeTranslation();
+#ifdef IREE_HAVE_EMITC_DIALECT
+void registerToCTranslation();
+#endif // IREE_HAVE_EMITC_DIALECT
} // namespace VM
} // namespace IREE
@@ -31,6 +34,10 @@
inline void registerVMTargets() {
static bool init_once = []() {
IREE::VM::registerToVMBytecodeTranslation();
+#ifdef IREE_HAVE_EMITC_DIALECT
+ IREE::VM::registerToCTranslation();
+#endif // IREE_HAVE_EMITC_DIALECT
+
return true;
}();
(void)init_once;
diff --git a/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp b/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp
index de17975..b3081c1 100644
--- a/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp
+++ b/iree/compiler/Dialect/VM/Tools/VMOpTableGen.cpp
@@ -29,47 +29,33 @@
using ::llvm::formatv;
using ::llvm::Record;
-// Finds all serializable ops and emits a enum and template table for their
-// opcode and name.
-bool emitOpTableDefs(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) {
- llvm::emitSourceFileHeader("IREE VM Operation Tables", os);
-
- std::vector<const Record *> opRecords(256);
+void emitOpTable(const llvm::RecordKeeper &recordKeeper, const Record &tableDef,
+ raw_ostream &os) {
std::vector<const Record *> opEncodings(256);
- auto defs = recordKeeper.getAllDerivedDefinitions("VM_Op");
- for (const auto *def : defs) {
- if (def->isValueUnset("encoding")) continue;
- auto encodingExprs = def->getValueAsListOfDefs("encoding");
- for (auto encodingExpr : encodingExprs) {
- if (encodingExpr->getType()->getAsString() == "VM_EncOpcode") {
- auto *opcode = encodingExpr->getValueAsDef("opcode");
- opRecords[opcode->getValueAsInt("value")] = def;
- opEncodings[opcode->getValueAsInt("value")] = opcode;
- break;
- }
- }
+ for (auto *opcodeDef : tableDef.getValueAsListOfDefs("enumerants")) {
+ opEncodings[opcodeDef->getValueAsInt("value")] = opcodeDef;
}
os << "typedef enum {\n";
for (int i = 0; i < 256; ++i) {
- auto *def = opRecords[i];
- if (def) {
- auto *opcode = opEncodings[i];
- os << formatv(" IREE_VM_OP_{0} = {1}",
+ if (auto *opcode = opEncodings[i]) {
+ os << formatv(" IREE_VM_OP_{0}_{1} = {2}",
+ tableDef.getValueAsString("opcodeEnumTag"),
opcode->getValueAsString("symbol"), format_hex(i, 4, true));
} else {
- os << formatv(" IREE_VM_OP_RSV_{0}", format_hex(i, 4, true));
+ os << formatv(" IREE_VM_OP_{0}_RSV_{1}",
+ tableDef.getValueAsString("opcodeEnumTag"),
+ format_hex(i, 4, true));
}
os << ",\n";
}
- os << "} iree_vm_op_t;\n";
+ os << "} " << tableDef.getValueAsString("opcodeEnumName") << ";\n";
os << "\n";
- os << "#define IREE_VM_OP_TABLE(OPC, RSV) \\\n";
+ os << formatv("#define IREE_VM_OP_{0}_TABLE(OPC, RSV) \\\n",
+ tableDef.getValueAsString("opcodeEnumTag"));
for (int i = 0; i < 256; ++i) {
- auto *def = opRecords[i];
- if (def) {
- auto *opcode = opEncodings[i];
+ if (auto *opcode = opEncodings[i]) {
os << formatv(" OPC({0}, {1})", format_hex(i, 4, true),
opcode->getValueAsString("symbol"));
} else {
@@ -80,6 +66,17 @@
}
}
os << "\n\n";
+}
+
+// Finds all opcode tables in VMBase.td and emits a enum and template table for
+// their opcode and name.
+bool emitOpTableDefs(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) {
+ llvm::emitSourceFileHeader("IREE VM Operation Tables", os);
+
+ auto defs = recordKeeper.getAllDerivedDefinitions("VM_OPC_EnumAttr");
+ for (const auto *def : defs) {
+ emitOpTable(recordKeeper, *def, os);
+ }
return false;
}
diff --git a/iree/compiler/Translation/CMakeLists.txt b/iree/compiler/Translation/CMakeLists.txt
index dbe2324..e89e46f 100644
--- a/iree/compiler/Translation/CMakeLists.txt
+++ b/iree/compiler/Translation/CMakeLists.txt
@@ -12,8 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# bazel_to_cmake: DO NOT EDIT (EmitC is only buildable with CMake)
+
iree_add_all_subdirs()
+if(IREE_ENABLE_EMITC)
+ set(IREE_VM_CONDITIONAL_TARGETS
+ iree::compiler::Dialect::VM::Target::C
+ )
+endif()
+
iree_cc_library(
NAME
IREEVM
@@ -36,5 +44,6 @@
iree::compiler::Dialect::VM::Conversion::StandardToVM
iree::compiler::Dialect::VM::Target::Bytecode
iree::compiler::Dialect::VM::Transforms
+ ${IREE_VM_CONDITIONAL_TARGETS}
PUBLIC
)
diff --git a/iree/compiler/Translation/IREEVM.cpp b/iree/compiler/Translation/IREEVM.cpp
index 8708592..14284eb 100644
--- a/iree/compiler/Translation/IREEVM.cpp
+++ b/iree/compiler/Translation/IREEVM.cpp
@@ -23,6 +23,10 @@
#include "mlir/Pass/PassManager.h"
#include "mlir/Translation.h"
+#ifdef IREE_HAVE_EMITC_DIALECT
+#include "iree/compiler/Dialect/VM/Target/C/CModuleTarget.h"
+#endif // IREE_HAVE_EMITC_DIALECT
+
namespace mlir {
namespace iree_compiler {
@@ -73,10 +77,8 @@
});
}
-LogicalResult translateFromMLIRToVMBytecodeModule(
- ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions,
- IREE::VM::BytecodeTargetOptions bytecodeOptions,
- llvm::raw_ostream &output) {
+static LogicalResult translateFromMLIRToVM(
+ ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions) {
// Convert from our source to a vm.module in canonical form.
// After this completes we have a non-bytecode-specific vm.module that we
// could lower to other forms (LLVM IR, C, etc).
@@ -86,9 +88,22 @@
IREE::HAL::buildHALTransformPassPipeline(passManager, executableOptions);
IREE::VM::buildVMTransformPassPipeline(passManager);
passManager.addPass(mlir::iree_compiler::IREE::createDropCompilerHintsPass());
+
if (failed(passManager.run(moduleOp))) {
return moduleOp.emitError() << "conversion from source -> vm failed";
}
+ return success();
+}
+
+LogicalResult translateFromMLIRToVMBytecodeModule(
+ ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions,
+ IREE::VM::BytecodeTargetOptions bytecodeOptions,
+ llvm::raw_ostream &output) {
+ auto result = translateFromMLIRToVM(moduleOp, executableOptions);
+
+ if (failed(result)) {
+ return result;
+ }
// Serialize to bytecode.
return translateModuleToBytecode(moduleOp, bytecodeOptions, output);
@@ -103,10 +118,37 @@
bytecodeTargetOptions, output);
}
+#ifdef IREE_HAVE_EMITC_DIALECT
+LogicalResult translateFromMLIRToVMCModule(
+ ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions,
+ llvm::raw_ostream &output) {
+ auto result = translateFromMLIRToVM(moduleOp, executableOptions);
+
+ if (failed(result)) {
+ return result;
+ }
+
+ // Serialize to c code.
+ return mlir::iree_compiler::IREE::VM::translateModuleToC(moduleOp, output);
+}
+
+static LogicalResult translateFromMLIRToVMCModuleWithFlags(
+ ModuleOp moduleOp, llvm::raw_ostream &output) {
+ mlir::registerPassManagerCLOptions();
+ auto TargetOptions = IREE::HAL::getTargetOptionsFromFlags();
+ return translateFromMLIRToVMCModule(moduleOp, TargetOptions, output);
+}
+#endif // IREE_HAVE_EMITC_DIALECT
+
void registerIREEVMTranslation() {
TranslateFromMLIRRegistration toVMBytecodeModuleWithFlags(
"iree-mlir-to-vm-bytecode-module",
translateFromMLIRToVMBytecodeModuleWithFlags);
+
+#ifdef IREE_HAVE_EMITC_DIALECT
+ TranslateFromMLIRRegistration toVMCModuleWithFlags(
+ "iree-mlir-to-vm-c-module", translateFromMLIRToVMCModuleWithFlags);
+#endif // IREE_HAVE_EMITC_DIALECT
}
} // namespace iree_compiler
diff --git a/iree/compiler/Translation/IREEVM.h b/iree/compiler/Translation/IREEVM.h
index 9b95a31..ed9f36a 100644
--- a/iree/compiler/Translation/IREEVM.h
+++ b/iree/compiler/Translation/IREEVM.h
@@ -53,6 +53,16 @@
ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions,
IREE::VM::BytecodeTargetOptions bytecodeOptions, llvm::raw_ostream &output);
+#ifdef IREE_HAVE_EMITC_DIALECT
+// Translates an MLIR module containing a set of supported IREE input dialects
+// to an IREE VM C module.
+//
+// Exposed via the --iree-mlir-to-vm-c-module translation.
+LogicalResult translateFromMLIRToVMCModule(
+ ModuleOp moduleOp, IREE::HAL::TargetOptions executableOptions,
+ llvm::raw_ostream &output);
+#endif // IREE_HAVE_EMITC_DIALECT
+
// TODO(benvanik): versions with multiple targets, etc.
void registerIREEVMTransformPassPipeline();
diff --git a/iree/hal/dylib/BUILD b/iree/hal/dylib/BUILD
index 25c08ea..fc3ccb9 100644
--- a/iree/hal/dylib/BUILD
+++ b/iree/hal/dylib/BUILD
@@ -60,7 +60,6 @@
srcs = ["dylib_executable.cc"],
hdrs = ["dylib_executable.h"],
deps = [
- ":memref_runtime",
"//iree/base:dynamic_library",
"//iree/base:file_io",
"//iree/base:status",
@@ -89,10 +88,3 @@
"//iree/hal:executable_format",
],
)
-
-cc_library(
- name = "memref_runtime",
- hdrs = [
- "memref_runtime.h",
- ],
-)
diff --git a/iree/hal/dylib/CMakeLists.txt b/iree/hal/dylib/CMakeLists.txt
index 7644d92..d720435 100644
--- a/iree/hal/dylib/CMakeLists.txt
+++ b/iree/hal/dylib/CMakeLists.txt
@@ -65,7 +65,6 @@
SRCS
"dylib_executable.cc"
DEPS
- ::memref_runtime
absl::inlined_vector
absl::span
flatbuffers
@@ -97,11 +96,3 @@
iree::hal::executable_format
PUBLIC
)
-
-iree_cc_library(
- NAME
- memref_runtime
- HDRS
- "memref_runtime.h"
- PUBLIC
-)
diff --git a/iree/hal/dylib/dylib_executable.cc b/iree/hal/dylib/dylib_executable.cc
index e06bb19..e58a003 100644
--- a/iree/hal/dylib/dylib_executable.cc
+++ b/iree/hal/dylib/dylib_executable.cc
@@ -17,7 +17,6 @@
#include "flatbuffers/flatbuffers.h"
#include "iree/base/file_io.h"
#include "iree/base/tracing.h"
-#include "iree/hal/dylib/memref_runtime.h"
#include "iree/schemas/dylib_executable_def_generated.h"
namespace iree {
@@ -96,15 +95,9 @@
struct DyLibDispatchState : public HostExecutable::DispatchState {
DyLibDispatchState() = default;
- ~DyLibDispatchState() override {
- for (int i = 0; i < descriptors.size(); ++i) {
- freeUnrankedDescriptor(descriptors[i]);
- }
- }
-
void* entry_function = nullptr;
- absl::InlinedVector<UnrankedMemRefType<uint32_t>*, 4> descriptors;
absl::InlinedVector<void*, 4> args;
+ absl::InlinedVector<int64_t, 4> push_constant;
};
StatusOr<ref_ptr<HostExecutable::DispatchState>>
@@ -127,17 +120,14 @@
MemoryAccessBitfield::kWrite,
io_binding.offset, io_binding.length));
auto data = memory.mutable_data();
- auto descriptor = allocUnrankedDescriptor<uint32_t>(data);
- dispatch_state->descriptors.push_back(descriptor);
- dispatch_state->args.push_back(&descriptor->descriptor);
+
+ dispatch_state->args.push_back(data);
}
}
-
- auto push_constants_descriptor = allocUnrankedDescriptor<uint32_t>(
- const_cast<uint32_t*>(params.push_constants->values.data()),
- {static_cast<int64_t>(params.push_constants->values.size())});
- dispatch_state->descriptors.push_back(push_constants_descriptor);
- dispatch_state->args.push_back(&push_constants_descriptor->descriptor);
+ // TODO(ataei): Consider moving this casting to codegen side ?!
+ for (int i = 0; i < params.push_constants->values.size(); ++i) {
+ dispatch_state->push_constant.push_back(params.push_constants->values[i]);
+ }
return std::move(dispatch_state);
}
@@ -147,8 +137,10 @@
IREE_TRACE_SCOPE0("DyLibExecutable::DispatchTile");
auto* dispatch_state = static_cast<DyLibDispatchState*>(state);
- auto entry_function = (void (*)(void**))dispatch_state->entry_function;
- entry_function(dispatch_state->args.data());
+ auto entry_function =
+ (void (*)(void**, int64_t*))dispatch_state->entry_function;
+ entry_function(dispatch_state->args.data(),
+ dispatch_state->push_constant.data());
return OkStatus();
}
diff --git a/iree/hal/dylib/memref_runtime.h b/iree/hal/dylib/memref_runtime.h
deleted file mode 100644
index 50d3987..0000000
--- a/iree/hal/dylib/memref_runtime.h
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#ifndef IREE_HAL_DYLIB_MEMREF_RUNTIME_H_
-#define IREE_HAL_DYLIB_MEMREF_RUNTIME_H_
-
-#include <assert.h>
-
-#include <cstdint>
-#include <vector>
-
-namespace iree {
-namespace hal {
-namespace dylib {
-
-template <int N>
-void dropFront(int64_t arr[N], int64_t *res) {
- for (unsigned i = 1; i < N; ++i) *(res + i - 1) = arr[i];
-}
-
-/// StridedMemRef descriptor type with static rank.
-template <typename T, int N>
-struct StridedMemRefType {
- T *basePtr;
- T *data;
- int64_t offset;
- int64_t sizes[N];
- int64_t strides[N];
- // This operator[] is extremely slow and only for sugaring purposes.
- StridedMemRefType<T, N - 1> operator[](int64_t idx) {
- StridedMemRefType<T, N - 1> res;
- res.basePtr = basePtr;
- res.data = data;
- res.offset = offset + idx * strides[0];
- dropFront<N>(sizes, res.sizes);
- dropFront<N>(strides, res.strides);
- return res;
- }
-};
-
-/// StridedMemRef descriptor type specialized for rank 1.
-template <typename T>
-struct StridedMemRefType<T, 1> {
- T *basePtr;
- T *data;
- int64_t offset;
- int64_t sizes[1];
- int64_t strides[1];
- T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
-};
-
-/// StridedMemRef descriptor type specialized for rank 0.
-template <typename T>
-struct StridedMemRefType<T, 0> {
- T *basePtr;
- T *data;
- int64_t offset;
-};
-
-// Unranked MemRef
-template <typename T>
-struct UnrankedMemRefType {
- int64_t rank;
- void *descriptor;
-};
-
-// Given a shape with sizes greater than 0 along all dimensions,
-// returns the distance, in number of elements, between a slice in a dimension
-// and the next slice in the same dimension.
-// e.g. shape[3, 4, 5] -> strides[20, 5, 1]
-inline std::vector<int64_t> makeStrides(const std::vector<int64_t> &shape) {
- std::vector<int64_t> tmp;
- if (shape.empty()) return tmp;
- tmp.reserve(shape.size());
- int64_t running = 1;
- for (auto rit = shape.rbegin(), reit = shape.rend(); rit != reit; ++rit) {
- assert(*rit > 0 &&
- "size must be greater than 0 along all dimensions of shape");
- tmp.push_back(running);
- running *= *rit;
- }
- return std::vector<int64_t>(tmp.rbegin(), tmp.rend());
-}
-
-// Mallocs a StridedMemRefDescriptor<T, N>* that matches the MLIR ABI.
-// This is an implementation detail that is kept in sync with MLIR codegen
-// conventions.
-template <typename T, int N>
-StridedMemRefType<T, N> *makeStridedMemRefDescriptor(
- void *ptr, const std::vector<int64_t> &shape) {
- StridedMemRefType<T, N> *descriptor = static_cast<StridedMemRefType<T, N> *>(
- malloc(sizeof(StridedMemRefType<T, N>)));
- descriptor->basePtr = static_cast<T *>(ptr);
- descriptor->data = static_cast<T *>(ptr);
- descriptor->offset = 0;
- std::copy(shape.begin(), shape.end(), descriptor->sizes);
- auto strides = makeStrides(shape);
- std::copy(strides.begin(), strides.end(), descriptor->strides);
- return descriptor;
-}
-
-// Mallocs a StridedMemRefDescriptor<T, 0>* (i.e. a pointer to scalar) that
-// matches the MLIR ABI. This is an implementation detail that is kept in sync
-// with MLIR codegen conventions.
-template <typename T>
-StridedMemRefType<T, 0> *makeStridedMemRefDescriptor(
- void *ptr, const std::vector<int64_t> &shape) {
- StridedMemRefType<T, 0> *descriptor = static_cast<StridedMemRefType<T, 0> *>(
- malloc(sizeof(StridedMemRefType<T, 0>)));
- descriptor->basePtr = static_cast<T *>(ptr);
- descriptor->data = static_cast<T *>(ptr);
- descriptor->offset = 0;
- return descriptor;
-}
-
-// Mallocs an UnrankedMemRefType<T>* that contains a ranked
-// StridedMemRefDescriptor<T, Rank>* and matches the MLIR ABI. This is an
-// implementation detail that is kept in sync with MLIR codegen conventions.
-template <typename T>
-UnrankedMemRefType<T> *allocUnrankedDescriptor(
- void *data, const std::vector<int64_t> &shape) {
- UnrankedMemRefType<T> *res = static_cast<UnrankedMemRefType<T> *>(
- malloc(sizeof(UnrankedMemRefType<T>)));
- res->rank = shape.size();
- if (res->rank == 0)
- res->descriptor = makeStridedMemRefDescriptor<T>(data, shape);
- else if (res->rank == 1)
- res->descriptor = makeStridedMemRefDescriptor<T, 1>(data, shape);
- else if (res->rank == 2)
- res->descriptor = makeStridedMemRefDescriptor<T, 2>(data, shape);
- else if (res->rank == 3)
- res->descriptor = makeStridedMemRefDescriptor<T, 3>(data, shape);
- else if (res->rank == 4)
- res->descriptor = makeStridedMemRefDescriptor<T, 4>(data, shape);
- else if (res->rank == 5)
- res->descriptor = makeStridedMemRefDescriptor<T, 5>(data, shape);
- else if (res->rank == 6)
- res->descriptor = makeStridedMemRefDescriptor<T, 6>(data, shape);
- else
- assert(false && "Unsupported 6+D memref descriptor");
- return res;
-}
-
-// Shape and strides aren't used in the generated code (yet).
-// TODO(ataei): Delete this version once we can pass shapes.
-template <typename T>
-UnrankedMemRefType<T> *allocUnrankedDescriptor(void *data) {
- UnrankedMemRefType<T> *res = static_cast<UnrankedMemRefType<T> *>(
- malloc(sizeof(UnrankedMemRefType<T>)));
- res->descriptor = makeStridedMemRefDescriptor<T>(data, {});
- return res;
-}
-
-// Frees an UnrankedMemRefType<T>*
-template <typename T>
-void freeUnrankedDescriptor(UnrankedMemRefType<T> *desc) {
- free(desc->descriptor);
- free(desc);
-}
-
-} // namespace dylib
-} // namespace hal
-} // namespace iree
-
-#endif // IREE_HAL_DYLIB_MEMREF_RUNTIME_H_
diff --git a/iree/hal/llvmjit/BUILD b/iree/hal/llvmjit/BUILD
index 088bb8b..3ebd609 100644
--- a/iree/hal/llvmjit/BUILD
+++ b/iree/hal/llvmjit/BUILD
@@ -64,7 +64,6 @@
srcs = ["llvmjit_executable.cc"],
hdrs = ["llvmjit_executable.h"],
deps = [
- ":memref_runtime",
"//iree/base:status",
"//iree/base:tracing",
"//iree/hal:buffer",
@@ -95,10 +94,3 @@
"//iree/hal:executable_format",
],
)
-
-cc_library(
- name = "memref_runtime",
- hdrs = [
- "memref_runtime.h",
- ],
-)
diff --git a/iree/hal/llvmjit/CMakeLists.txt b/iree/hal/llvmjit/CMakeLists.txt
index 8418745..ca40941 100644
--- a/iree/hal/llvmjit/CMakeLists.txt
+++ b/iree/hal/llvmjit/CMakeLists.txt
@@ -68,7 +68,6 @@
SRCS
"llvmjit_executable.cc"
DEPS
- ::memref_runtime
LLVMAsmParser
LLVMCore
LLVMOrcJIT
@@ -102,11 +101,3 @@
iree::hal::executable_format
PUBLIC
)
-
-iree_cc_library(
- NAME
- memref_runtime
- HDRS
- "memref_runtime.h"
- PUBLIC
-)
diff --git a/iree/hal/llvmjit/llvmjit_executable.cc b/iree/hal/llvmjit/llvmjit_executable.cc
index 1596b9e..7d26ccd 100644
--- a/iree/hal/llvmjit/llvmjit_executable.cc
+++ b/iree/hal/llvmjit/llvmjit_executable.cc
@@ -21,7 +21,6 @@
#include "iree/base/tracing.h"
#include "iree/hal/buffer.h"
#include "iree/hal/executable.h"
-#include "iree/hal/llvmjit/memref_runtime.h"
#include "iree/schemas/llvmir_executable_def_generated.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@@ -82,13 +81,11 @@
make_ref<LLVMJITExecutable>(spec, std::move(ll_jit), allow_aliasing_data);
for (const auto func_name : *entry_points) {
- auto func_symbol =
- executable->ll_jit_->lookup("invoke_" + func_name->str());
+ auto func_symbol = executable->ll_jit_->lookup(func_name->str());
if (!func_symbol) {
return NotFoundErrorBuilder(IREE_LOC)
<< "Can't JIT compile function : " << func_name;
}
- // Map function to its invoke_ symbol.
executable->symbols_.push_back(func_symbol.get());
}
@@ -111,15 +108,10 @@
struct LLVMJITDispatchState : public HostExecutable::DispatchState {
LLVMJITDispatchState() = default;
- ~LLVMJITDispatchState() override {
- for (int i = 0; i < descriptors.size(); ++i) {
- freeUnrankedDescriptor(descriptors[i]);
- }
- }
llvm::JITEvaluatedSymbol symbol;
- llvm::SmallVector<UnrankedMemRefType<uint32_t>*, 4> descriptors;
llvm::SmallVector<void*, 4> args;
+ llvm::SmallVector<int64_t, 4> push_constant;
};
StatusOr<ref_ptr<HostExecutable::DispatchState>>
@@ -142,17 +134,13 @@
MemoryAccessBitfield::kWrite,
io_binding.offset, io_binding.length));
auto data = memory.mutable_data();
- auto descriptor = allocUnrankedDescriptor<uint32_t>(data);
- dispatch_state->descriptors.push_back(descriptor);
- dispatch_state->args.push_back(&descriptor->descriptor);
+ dispatch_state->args.push_back(data);
}
}
-
- auto push_constants_descriptor = allocUnrankedDescriptor<uint32_t>(
- const_cast<uint32_t*>(params.push_constants->values.data()),
- {static_cast<int64_t>(params.push_constants->values.size())});
- dispatch_state->descriptors.push_back(push_constants_descriptor);
- dispatch_state->args.push_back(&push_constants_descriptor->descriptor);
+ // TODO(ataei): Consider moving this casting to codegen side ?!
+ for (int i = 0; i < params.push_constants->values.size(); ++i) {
+ dispatch_state->push_constant.push_back(params.push_constants->values[i]);
+ }
return std::move(dispatch_state);
}
@@ -162,8 +150,9 @@
IREE_TRACE_SCOPE0("LLVMJITExecutable::DispatchTile");
auto* dispatch_state = static_cast<LLVMJITDispatchState*>(state);
- auto func_ptr = (void (*)(void**))dispatch_state->symbol.getAddress();
- func_ptr(dispatch_state->args.data());
+ auto func_ptr =
+ (void (*)(void**, int64_t*))dispatch_state->symbol.getAddress();
+ func_ptr(dispatch_state->args.data(), dispatch_state->push_constant.data());
return OkStatus();
}
diff --git a/iree/hal/llvmjit/memref_runtime.h b/iree/hal/llvmjit/memref_runtime.h
deleted file mode 100644
index 6b94410..0000000
--- a/iree/hal/llvmjit/memref_runtime.h
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#ifndef IREE_HAL_LLVMJIT_LLVMJIT_MEMREF_RUNTIME_H_
-#define IREE_HAL_LLVMJIT_LLVMJIT_MEMREF_RUNTIME_H_
-
-#include <assert.h>
-
-#include <cstdint>
-#include <vector>
-
-namespace iree {
-namespace hal {
-namespace llvmjit {
-
-template <int N>
-void dropFront(int64_t arr[N], int64_t *res) {
- for (unsigned i = 1; i < N; ++i) *(res + i - 1) = arr[i];
-}
-
-/// StridedMemRef descriptor type with static rank.
-template <typename T, int N>
-struct StridedMemRefType {
- T *basePtr;
- T *data;
- int64_t offset;
- int64_t sizes[N];
- int64_t strides[N];
- // This operator[] is extremely slow and only for sugaring purposes.
- StridedMemRefType<T, N - 1> operator[](int64_t idx) {
- StridedMemRefType<T, N - 1> res;
- res.basePtr = basePtr;
- res.data = data;
- res.offset = offset + idx * strides[0];
- dropFront<N>(sizes, res.sizes);
- dropFront<N>(strides, res.strides);
- return res;
- }
-};
-
-/// StridedMemRef descriptor type specialized for rank 1.
-template <typename T>
-struct StridedMemRefType<T, 1> {
- T *basePtr;
- T *data;
- int64_t offset;
- int64_t sizes[1];
- int64_t strides[1];
- T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
-};
-
-/// StridedMemRef descriptor type specialized for rank 0.
-template <typename T>
-struct StridedMemRefType<T, 0> {
- T *basePtr;
- T *data;
- int64_t offset;
-};
-
-// Unranked MemRef
-template <typename T>
-struct UnrankedMemRefType {
- int64_t rank;
- void *descriptor;
-};
-
-// Given a shape with sizes greater than 0 along all dimensions,
-// returns the distance, in number of elements, between a slice in a dimension
-// and the next slice in the same dimension.
-// e.g. shape[3, 4, 5] -> strides[20, 5, 1]
-inline std::vector<int64_t> makeStrides(const std::vector<int64_t> &shape) {
- std::vector<int64_t> tmp;
- if (shape.empty()) return tmp;
- tmp.reserve(shape.size());
- int64_t running = 1;
- for (auto rit = shape.rbegin(), reit = shape.rend(); rit != reit; ++rit) {
- assert(*rit > 0 &&
- "size must be greater than 0 along all dimensions of shape");
- tmp.push_back(running);
- running *= *rit;
- }
- return std::vector<int64_t>(tmp.rbegin(), tmp.rend());
-}
-
-// Mallocs a StridedMemRefDescriptor<T, N>* that matches the MLIR ABI.
-// This is an implementation detail that is kept in sync with MLIR codegen
-// conventions.
-template <typename T, int N>
-StridedMemRefType<T, N> *makeStridedMemRefDescriptor(
- void *ptr, const std::vector<int64_t> &shape) {
- StridedMemRefType<T, N> *descriptor = static_cast<StridedMemRefType<T, N> *>(
- malloc(sizeof(StridedMemRefType<T, N>)));
- descriptor->basePtr = static_cast<T *>(ptr);
- descriptor->data = static_cast<T *>(ptr);
- descriptor->offset = 0;
- std::copy(shape.begin(), shape.end(), descriptor->sizes);
- auto strides = makeStrides(shape);
- std::copy(strides.begin(), strides.end(), descriptor->strides);
- return descriptor;
-}
-
-// Mallocs a StridedMemRefDescriptor<T, 0>* (i.e. a pointer to scalar) that
-// matches the MLIR ABI. This is an implementation detail that is kept in sync
-// with MLIR codegen conventions.
-template <typename T>
-StridedMemRefType<T, 0> *makeStridedMemRefDescriptor(
- void *ptr, const std::vector<int64_t> &shape) {
- StridedMemRefType<T, 0> *descriptor = static_cast<StridedMemRefType<T, 0> *>(
- malloc(sizeof(StridedMemRefType<T, 0>)));
- descriptor->basePtr = static_cast<T *>(ptr);
- descriptor->data = static_cast<T *>(ptr);
- descriptor->offset = 0;
- return descriptor;
-}
-
-// Mallocs an UnrankedMemRefType<T>* that contains a ranked
-// StridedMemRefDescriptor<T, Rank>* and matches the MLIR ABI. This is an
-// implementation detail that is kept in sync with MLIR codegen conventions.
-template <typename T>
-UnrankedMemRefType<T> *allocUnrankedDescriptor(
- void *data, const std::vector<int64_t> &shape) {
- UnrankedMemRefType<T> *res = static_cast<UnrankedMemRefType<T> *>(
- malloc(sizeof(UnrankedMemRefType<T>)));
- res->rank = shape.size();
- if (res->rank == 0)
- res->descriptor = makeStridedMemRefDescriptor<T>(data, shape);
- else if (res->rank == 1)
- res->descriptor = makeStridedMemRefDescriptor<T, 1>(data, shape);
- else if (res->rank == 2)
- res->descriptor = makeStridedMemRefDescriptor<T, 2>(data, shape);
- else if (res->rank == 3)
- res->descriptor = makeStridedMemRefDescriptor<T, 3>(data, shape);
- else if (res->rank == 4)
- res->descriptor = makeStridedMemRefDescriptor<T, 4>(data, shape);
- else if (res->rank == 5)
- res->descriptor = makeStridedMemRefDescriptor<T, 5>(data, shape);
- else if (res->rank == 6)
- res->descriptor = makeStridedMemRefDescriptor<T, 6>(data, shape);
- else
- assert(false && "Unsupported 6+D memref descriptor");
- return res;
-}
-
-// Shape and strides aren't used in the generated code (yet).
-// TODO(ataei): Delete this version once we can pass shapes.
-template <typename T>
-UnrankedMemRefType<T> *allocUnrankedDescriptor(void *data) {
- UnrankedMemRefType<T> *res = static_cast<UnrankedMemRefType<T> *>(
- malloc(sizeof(UnrankedMemRefType<T>)));
- res->descriptor = makeStridedMemRefDescriptor<T>(data, {});
- return res;
-}
-
-// Frees an UnrankedMemRefType<T>*
-template <typename T>
-void freeUnrankedDescriptor(UnrankedMemRefType<T> *desc) {
- free(desc->descriptor);
- free(desc);
-}
-
-} // namespace llvmjit
-} // namespace hal
-} // namespace iree
-
-#endif // IREE_HAL_LLVMJIT_LLVMJIT_MEMREF_RUNTIME_H_
diff --git a/iree/modules/hal/hal_module.cc b/iree/modules/hal/hal_module.cc
index 2b1d6db..e51938f 100644
--- a/iree/modules/hal/hal_module.cc
+++ b/iree/modules/hal/hal_module.cc
@@ -457,6 +457,30 @@
return BufferViewDimsN<4>(std::move(buffer_view));
}
+ Status BufferViewTrace(
+ absl::Span<const vm::ref<iree_hal_buffer_view_t>> buffer_views) {
+ // TODO(hanchung): Have better information for each dump, eg, having StrAttr
+ // for each trace event so we can map the dump to dispatch functions easier.
+ fprintf(stderr, "=== DEBUG DUMP ===\n");
+ for (auto& view : buffer_views) {
+ std::string result_str(4096, '\0');
+ iree_status_t status;
+ do {
+ iree_host_size_t actual_length = 0;
+ status = iree_hal_buffer_view_format(
+ view.get(), /*max_element_count=*/1024, result_str.size() + 1,
+ &result_str[0], &actual_length);
+ result_str.resize(actual_length);
+ } while (iree_status_is_out_of_range(status));
+ if (!iree_status_is_ok(status)) {
+ return FromApiStatus(status, IREE_LOC);
+ }
+ fprintf(stderr, "%s\n", result_str.c_str());
+ }
+ fprintf(stderr, "\n");
+ return OkStatus();
+ }
+
//===--------------------------------------------------------------------===//
// iree::hal::CommandBuffer
//===--------------------------------------------------------------------===//
@@ -876,6 +900,8 @@
&HALModuleState::BufferViewDims3),
vm::MakeNativeFunction("buffer_view.dims.4",
&HALModuleState::BufferViewDims4),
+ vm::MakeNativeFunction("buffer_view.trace",
+ &HALModuleState::BufferViewTrace),
vm::MakeNativeFunction("command_buffer.create",
&HALModuleState::CommandBufferCreate),
diff --git a/iree/samples/simple_embedding/simple_embedding_test.cc b/iree/samples/simple_embedding/simple_embedding_test.cc
index c99b6d7..489ac33 100644
--- a/iree/samples/simple_embedding/simple_embedding_test.cc
+++ b/iree/samples/simple_embedding/simple_embedding_test.cc
@@ -183,8 +183,8 @@
ASSERT_API_OK(iree_hal_buffer_unmap(ret_buffer, &mapped_memory));
LOG(INFO) << "Results match!";
- iree_vm_list_deinitialize(inputs.get());
- iree_vm_list_deinitialize(outputs.get());
+ inputs.reset();
+ outputs.reset();
iree_hal_device_release(device);
iree_vm_context_release(context);
iree_vm_instance_release(instance);
diff --git a/iree/samples/vulkan/BUILD b/iree/samples/vulkan/BUILD
index b684de7..8278b40 100644
--- a/iree/samples/vulkan/BUILD
+++ b/iree/samples/vulkan/BUILD
@@ -57,6 +57,7 @@
"//iree/vm:bytecode_module",
"//iree/vm:ref_cc",
"@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/types:span",
"@dear_imgui",
"@dear_imgui//:imgui_sdl_vulkan",
"@iree_vulkan_headers//:vulkan_headers_no_prototypes",
diff --git a/iree/schemas/bytecode_module_def.fbs b/iree/schemas/bytecode_module_def.fbs
index 6eff279..f971093 100644
--- a/iree/schemas/bytecode_module_def.fbs
+++ b/iree/schemas/bytecode_module_def.fbs
@@ -43,7 +43,7 @@
// Function level reflection attributes.
// These are typically used to communicate additional ABI metadata needed
// for dynamic invocation and host language mapping.
- // See: docs/function_abi.md
+ // See: docs/design_docs/function_abi.md
reflection_attrs:[ReflectionAttrDef];
}
diff --git a/iree/test/e2e/regression/dynamic_torch_index_select_high_rank.mlir b/iree/test/e2e/regression/dynamic_torch_index_select_high_rank.mlir
new file mode 100644
index 0000000..ce42e31
--- /dev/null
+++ b/iree/test/e2e/regression/dynamic_torch_index_select_high_rank.mlir
@@ -0,0 +1,58 @@
+// RUN: iree-run-mlir %s -iree-hal-target-backends=llvm-ir -input-value="2x2xi32=[6, 7] [8, 9]" -input-value="2x2x2x2xi32=[[[0, 1] [1, 0]] [[0, 0] [1, 1]]] [[[1, 1] [0, 0]] [[0, 1] [1, 0]]]" | IreeFileCheck %s
+
+// CHECK-LABEL: EXEC @torch_index_select1
+func @torch_index_select1(%arg0: tensor<?x?xi32>, %arg1: tensor<?x?x?x?xi32>) -> tensor<?x?x?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 1 : i64, dim = 1 : i64} : (tensor<?x?xi32>, tensor<?x?x?x?xi32>) -> tensor<?x?x?x?xi32>
+ return %0 : tensor<?x?x?x?xi32>
+}
+
+// CHECK: 2x2x2x2xi32=[
+// CHECK-SAME: [
+// CHECK-SAME: [6 7][7 6]
+// CHECK-SAME: ][
+// CHECK-SAME: [6 6][7 7]
+// CHECK-SAME: ]
+// CHECK-SAME: ][
+// CHECK-SAME: [
+// CHECK-SAME: [9 9][8 8]
+// CHECK-SAME: ][
+// CHECK-SAME: [8 9][9 8]
+// CHECK-SAME: ]
+// CHECK-SAME: ]
+
+// CHECK-LABEL: EXEC @torch_index_select2
+func @torch_index_select2(%arg0: tensor<?x?xi32>, %arg1: tensor<?x?x?x?xi32>) -> tensor<?x?x?x?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?xi32>, tensor<?x?x?x?xi32>) -> tensor<?x?x?x?x?xi32>
+ return %0 : tensor<?x?x?x?x?xi32>
+}
+
+// CHECK: 2x2x2x2x2xi32=[
+// CHECK-SAME: [
+// CHECK-SAME: [
+// CHECK-SAME: [6 7][8 9]
+// CHECK-SAME: ][
+// CHECK-SAME: [8 9][6 7]
+// CHECK-SAME: ]
+// CHECK-SAME: ][
+// CHECK-SAME: [
+// CHECK-SAME: [6 7][6 7]
+// CHECK-SAME: ][
+// CHECK-SAME: [8 9][8 9]
+// CHECK-SAME: ]
+// CHECK-SAME: ]
+// CHECK-SAME: ][
+// CHECK-SAME: [
+// CHECK-SAME: [
+// CHECK-SAME: [8 9][8 9]
+// CHECK-SAME: ][
+// CHECK-SAME: [6 7][6 7]
+// CHECK-SAME: ]
+// CHECK-SAME: ][
+// CHECK-SAME: [
+// CHECK-SAME: [6 7][8 9]
+// CHECK-SAME: ][
+// CHECK-SAME: [8 9][6 7]
+// CHECK-SAME: ]
+// CHECK-SAME: ]
+// CHECK-SAME: ]
+
diff --git a/iree/test/e2e/regression/dynamic_torch_index_select_negative.mlir b/iree/test/e2e/regression/dynamic_torch_index_select_negative.mlir
new file mode 100644
index 0000000..0755b5c
--- /dev/null
+++ b/iree/test/e2e/regression/dynamic_torch_index_select_negative.mlir
@@ -0,0 +1,14 @@
+// RUN: iree-run-mlir %s -iree-hal-target-backends=llvm-ir -input-value="2x2x2xi32=[[100, 101] [110, 111]] [[200, 201] [210, 211]]" -input-value="2x2x2xi32=[[0, 1] [1, 0]] [[0, 0] [1, 1]]" | IreeFileCheck %s
+
+// CHECK-LABEL: EXEC @torch_index_select1
+func @torch_index_select1(%arg0: tensor<?x?x?xi32>, %arg1: tensor<?x?x?xi32>) -> tensor<?x?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = -1 : i64, dim = -1 : i64} : (tensor<?x?x?xi32>, tensor<?x?x?xi32>) -> tensor<?x?x?xi32>
+ return %0 : tensor<?x?x?xi32>
+}
+
+// CHECK: 2x2x2xi32=[
+// CHECK-SAME: [100 101][111 110]
+// CHECK-SAME: ][
+// CHECK-SAME: [200 200][211 211]
+// CHECK-SAME: ]
+
diff --git a/iree/test/e2e/regression/dynamic_torch_index_select_scalar.mlir b/iree/test/e2e/regression/dynamic_torch_index_select_scalar.mlir
new file mode 100644
index 0000000..8ccb4fa
--- /dev/null
+++ b/iree/test/e2e/regression/dynamic_torch_index_select_scalar.mlir
@@ -0,0 +1,17 @@
+// RUN: iree-run-mlir %s -iree-hal-target-backends=llvm-ir -input-value="5x1x5xi32=[[1,2,3,4,5]] [[6,7,8,9,10]] [[11,12,13,14,15]] [[16,17,18,19,20]] [[21,22,23,24,25]]" -input-value="i32=0" | IreeFileCheck %s
+
+// CHECK-LABEL: EXEC @torch_index_select1
+func @torch_index_select1(%arg0: tensor<?x?x?xi32>, %arg1: tensor<i32>) -> tensor<?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?x?xi32>, tensor<i32>) -> tensor<?x?xi32>
+ return %0 : tensor<?x?xi32>
+}
+
+// CHECK: 1x5xi32=[1 2 3 4 5]
+
+// CHECK-LABEL: EXEC @torch_index_select2
+func @torch_index_select2(%arg0: tensor<?x?x?xi32>, %arg1: tensor<i32>) -> tensor<?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 0 : i64, dim = 1 : i64} : (tensor<?x?x?xi32>, tensor<i32>) -> tensor<?x?xi32>
+ return %0 : tensor<?x?xi32>
+}
+
+// CHECK: 5x5xi32=[1 2 3 4 5][6 7 8 9 10][11 12 13 14 15][16 17 18 19 20][21 22 23 24 25]
diff --git a/iree/test/e2e/regression/dynamic_torch_index_select_vector.mlir b/iree/test/e2e/regression/dynamic_torch_index_select_vector.mlir
new file mode 100644
index 0000000..d22ccc7
--- /dev/null
+++ b/iree/test/e2e/regression/dynamic_torch_index_select_vector.mlir
@@ -0,0 +1,28 @@
+// RUN: iree-run-mlir %s -iree-hal-target-backends=llvm-ir -input-value="3x2x2xi32=[[1, 2] [3, 4]] [[5, 6] [7, 8]] [[9, 10] [11, 12]]" -input-value="2xi32=[0, 1]" | IreeFileCheck %s
+
+// CHECK-LABEL: EXEC @torch_index_select1
+func @torch_index_select1(%arg0: tensor<?x?x?xi32>, %arg1: tensor<?xi32>) -> tensor<?x?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 0 : i64, dim = 1 : i64} : (tensor<?x?x?xi32>, tensor<?xi32>) -> tensor<?x?x?xi32>
+ return %0 : tensor<?x?x?xi32>
+}
+
+// CHECK: 3x2x2xi32=[
+// CHECK-SAME: [1 2][3 4]
+// CHECK-SAME: ][
+// CHECK-SAME: [5 6][7 8]
+// CHECK-SAME: ][
+// CHECK-SAME: [9 10][11 12]
+// CHECK-SAME: ]
+
+// CHECK-LABEL: EXEC @torch_index_select2
+func @torch_index_select2(%arg0: tensor<?x?x?xi32>, %arg1: tensor<?xi32>) -> tensor<?x?x?xi32> attributes {iree.module.export} {
+ %0 = "mhlo.torch_index_select"(%arg0, %arg1) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?x?xi32>, tensor<?xi32>) -> tensor<?x?x?xi32>
+ return %0 : tensor<?x?x?xi32>
+}
+
+// CHECK: 2x2x2xi32=[
+// CHECK-SAME: [1 2][3 4]
+// CHECK-SAME: ][
+// CHECK-SAME: [5 6][7 8]
+// CHECK-SAME: ]
+
diff --git a/iree/test/e2e/vulkan_specific/BUILD b/iree/test/e2e/vulkan_specific/BUILD
index 0565a86..4d20c33 100644
--- a/iree/test/e2e/vulkan_specific/BUILD
+++ b/iree/test/e2e/vulkan_specific/BUILD
@@ -29,38 +29,3 @@
driver = "vulkan",
target_backend = "vulkan-spirv",
)
-
-# TODO(#2345): Merge two tests into one single file.
-iree_check_single_backend_test_suite(
- name = "check_vulkan-spirv-split-pad-conv_vulkan",
- srcs = [
- "convolution1.mlir",
- "convolution2.mlir",
- ],
- driver = "vulkan",
- target_backend = "vulkan-spirv",
-)
-
-# TODO(#2345): Merge two tests into one single file.
-iree_check_single_backend_test_suite(
- name = "check_vulkan-spirv-nosplit-pad-conv_vulkan",
- srcs = [
- "convolution1.mlir",
- "convolution2.mlir",
- ],
- compiler_flags = ["-iree-extract-pad-from-conv=false"],
- driver = "vulkan",
- target_backend = "vulkan-spirv",
-)
-
-# TODO(#2345): Merge two tests into one single file.
-iree_check_single_backend_test_suite(
- name = "check_vulkan-spirv-conv-nocontrol_vulkan",
- srcs = [
- "convolution1.mlir",
- "convolution2.mlir",
- ],
- compiler_flags = ["-iree-codegen-use-legacy-conv-lowering=false"],
- driver = "vulkan",
- target_backend = "vulkan-spirv",
-)
diff --git a/iree/test/e2e/vulkan_specific/CMakeLists.txt b/iree/test/e2e/vulkan_specific/CMakeLists.txt
index cca6c58..32ee021 100644
--- a/iree/test/e2e/vulkan_specific/CMakeLists.txt
+++ b/iree/test/e2e/vulkan_specific/CMakeLists.txt
@@ -25,43 +25,3 @@
DRIVER
vulkan
)
-
-iree_check_single_backend_test_suite(
- NAME
- check_vulkan-spirv-split-pad-conv_vulkan
- SRCS
- "convolution1.mlir"
- "convolution2.mlir"
- TARGET_BACKEND
- vulkan-spirv
- DRIVER
- vulkan
-)
-
-iree_check_single_backend_test_suite(
- NAME
- check_vulkan-spirv-nosplit-pad-conv_vulkan
- SRCS
- "convolution1.mlir"
- "convolution2.mlir"
- TARGET_BACKEND
- vulkan-spirv
- DRIVER
- vulkan
- COMPILER_FLAGS
- "-iree-extract-pad-from-conv=false"
-)
-
-iree_check_single_backend_test_suite(
- NAME
- check_vulkan-spirv-conv-nocontrol_vulkan
- SRCS
- "convolution1.mlir"
- "convolution2.mlir"
- TARGET_BACKEND
- vulkan-spirv
- DRIVER
- vulkan
- COMPILER_FLAGS
- "-iree-codegen-use-legacy-conv-lowering=false"
-)
diff --git a/iree/test/e2e/vulkan_specific/convolution1.mlir b/iree/test/e2e/vulkan_specific/convolution1.mlir
deleted file mode 100644
index d0fc606..0000000
--- a/iree/test/e2e/vulkan_specific/convolution1.mlir
+++ /dev/null
@@ -1,66 +0,0 @@
-func @conv2d_nopadding() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0], [ 9.0, 10.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0], [39.0, 40.0]]]]> : tensor<1x4x5x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x5x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[1310.0],[1466.0],[1622.0]],
- [[2090.0],[2246.0],[2402.0]]
- ]]> : tensor<1x2x3x1xf32>) : tensor<1x2x3x1xf32>
- return
-}
-
-func @conv2d_1452x3221_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0], [ 9.0, 10.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0], [39.0, 40.0]]]]> : tensor<1x4x5x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<1x4x5x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[ 600.0], [ 736.0], [ 872.0], [1008.0], [ 476.0]],
- [[1310.0], [1466.0], [1622.0], [1778.0], [ 805.0]],
- [[2090.0], [2246.0], [2402.0], [2558.0], [1135.0]],
- [[1080.0], [1152.0], [1224.0], [1296.0], [ 524.0]]]]> : tensor<1x4x5x1xf32>) : tensor<1x4x5x1xf32>
- return
-}
diff --git a/iree/test/e2e/vulkan_specific/convolution2.mlir b/iree/test/e2e/vulkan_specific/convolution2.mlir
deleted file mode 100644
index ce88d5d..0000000
--- a/iree/test/e2e/vulkan_specific/convolution2.mlir
+++ /dev/null
@@ -1,140 +0,0 @@
-func @conv2d_2451x2311_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
- [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
- [[11.0], [12.0], [13.0], [14.0], [15.0]],
- [[16.0], [17.0], [18.0], [19.0], [20.0]]],
- [[[21.0], [22.0], [23.0], [24.0], [25.0]],
- [[26.0], [27.0], [28.0], [29.0], [30.0]],
- [[31.0], [32.0], [33.0], [34.0], [35.0]],
- [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[1.0]], [[2.0]], [[3.0]]],
- [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
- [[160.0], [226.0], [247.0], [268.0], [160.0]],
- [[240.0], [331.0], [352.0], [373.0], [220.0]],
- [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
- [[[400.0], [541.0], [562.0], [583.0], [340.0]],
- [[480.0], [646.0], [667.0], [688.0], [400.0]],
- [[560.0], [751.0], [772.0], [793.0], [460.0]],
- [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
- return
-}
-
-func @conv2d_no_padding() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0],
- [ 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0],
- [ 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0]],
- [[ 16.0, 17.0, 18.0],
- [ 19.0, 20.0, 21.0],
- [ 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0],
- [ 28.0, 29.0, 30.0]],
- [[ 31.0, 32.0, 33.0],
- [ 34.0, 35.0, 36.0],
- [ 37.0, 38.0, 39.0],
- [ 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0]],
- [[ 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0],
- [ 52.0, 53.0, 54.0],
- [ 55.0, 56.0, 57.0],
- [ 58.0, 59.0, 60.0]]],
- [[[ 61.0, 62.0, 63.0],
- [ 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0],
- [ 70.0, 71.0, 72.0],
- [ 73.0, 74.0, 75.0]],
- [[ 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0],
- [ 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0],
- [ 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0],
- [ 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0],
- [100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0]],
- [[106.0, 107.0, 108.0],
- [109.0, 110.0, 111.0],
- [112.0, 113.0, 114.0],
- [115.0, 116.0, 117.0],
- [118.0, 119.0, 120.0]]]]> : tensor<2x4x5x3xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]],
- [[ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0, 28.0, 29.0, 30.0],
- [ 31.0, 32.0, 33.0, 34.0, 35.0, 36.0]],
- [[ 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0, 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0, 52.0, 53.0, 54.0]]],
- [[[ 55.0, 56.0, 57.0, 58.0, 59.0, 60.0],
- [ 61.0, 62.0, 63.0, 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0, 70.0, 71.0, 72.0]],
- [[ 73.0, 74.0, 75.0, 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0, 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0, 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0, 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0, 100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x3xf32>, tensor<2x3x3x6xf32>) -> tensor<2x3x3x6xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[16065.0, 16290.0, 16515.0, 16740.0, 16965.0, 17190.0],
- [18873.0, 19152.0, 19431.0, 19710.0, 19989.0, 20268.0],
- [21681.0, 22014.0, 22347.0, 22680.0, 23013.0, 23346.0]],
- [[30105.0, 30600.0, 31095.0, 31590.0, 32085.0, 32580.0],
- [32913.0, 33462.0, 34011.0, 34560.0, 35109.0, 35658.0],
- [35721.0, 36324.0, 36927.0, 37530.0, 38133.0, 38736.0]],
- [[44145.0, 44910.0, 45675.0, 46440.0, 47205.0, 47970.0],
- [46953.0, 47772.0, 48591.0, 49410.0, 50229.0, 51048.0],
- [49761.0, 50634.0, 51507.0, 52380.0, 53253.0, 54126.0]]],
- [[[72225.0, 73530.0, 74835.0, 76140.0, 77445.0, 78750.0],
- [75033.0, 76392.0, 77751.0, 79110.0, 80469.0, 81828.0],
- [77841.0, 79254.0, 80667.0, 82080.0, 83493.0, 84906.0]],
- [[86265.0, 87840.0, 89415.0, 90990.0, 92565.0, 94140.0],
- [89073.0, 90702.0, 92331.0, 93960.0, 95589.0, 97218.0],
- [91881.0, 93564.0, 95247.0, 96930.0, 98613.0, 100296.0]],
- [[100305.0, 102150.0, 103995.0, 105840.0, 107685.0, 109530.0],
- [103113.0, 105012.0, 106911.0, 108810.0, 110709.0, 112608.0],
- [105921.0, 107874.0, 109827.0, 111780.0, 113733.0, 115686.0]]]]> : tensor<2x3x3x6xf32>) : tensor<2x3x3x6xf32>
- return
-}
diff --git a/iree/test/e2e/xla_ops/BUILD b/iree/test/e2e/xla_ops/BUILD
index 9af6bd1..744ff22 100644
--- a/iree/test/e2e/xla_ops/BUILD
+++ b/iree/test/e2e/xla_ops/BUILD
@@ -62,6 +62,7 @@
# "gather.mlir",
# "gather_concat.mlir",
#
+ "iota.mlir",
"log.mlir",
"maximum.mlir",
"minimum.mlir",
@@ -82,9 +83,7 @@
"tanh.mlir",
"torch_index_select.mlir",
"transpose.mlir",
-
- # TODO(#2022): fails on real devices.
- # "while.mlir",
+ "while.mlir",
],
driver = "vulkan",
target_backend = "vulkan-spirv",
@@ -108,6 +107,7 @@
"divide.mlir",
"dot.mlir",
"exponential.mlir",
+ "iota.mlir",
"log.mlir",
"maximum.mlir",
"minimum.mlir",
diff --git a/iree/test/e2e/xla_ops/CMakeLists.txt b/iree/test/e2e/xla_ops/CMakeLists.txt
index 7f65b06..e4ae959 100644
--- a/iree/test/e2e/xla_ops/CMakeLists.txt
+++ b/iree/test/e2e/xla_ops/CMakeLists.txt
@@ -46,6 +46,7 @@
"divide.mlir"
"dot.mlir"
"exponential.mlir"
+ "iota.mlir"
"log.mlir"
"maximum.mlir"
"minimum.mlir"
@@ -66,6 +67,7 @@
"tanh.mlir"
"torch_index_select.mlir"
"transpose.mlir"
+ "while.mlir"
TARGET_BACKEND
vulkan-spirv
DRIVER
@@ -91,6 +93,7 @@
"divide.mlir"
"dot.mlir"
"exponential.mlir"
+ "iota.mlir"
"log.mlir"
"maximum.mlir"
"minimum.mlir"
diff --git a/iree/test/e2e/xla_ops/convolution.mlir b/iree/test/e2e/xla_ops/convolution.mlir
index 22c5258..6ac1719 100644
--- a/iree/test/e2e/xla_ops/convolution.mlir
+++ b/iree/test/e2e/xla_ops/convolution.mlir
@@ -65,51 +65,47 @@
return
}
-// TODO(#2345): This test seems to fail when executed with another
-// test from this file, but passes as a standalone test. Needs further
-// investigation
-
-// func @conv2d_2451x2311_same() attributes { iree.module.export } {
-// %inputs = iree.unfoldable_constant dense<[
-// [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
-// [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
-// [[11.0], [12.0], [13.0], [14.0], [15.0]],
-// [[16.0], [17.0], [18.0], [19.0], [20.0]]],
-// [[[21.0], [22.0], [23.0], [24.0], [25.0]],
-// [[26.0], [27.0], [28.0], [29.0], [30.0]],
-// [[31.0], [32.0], [33.0], [34.0], [35.0]],
-// [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
-// %weights = iree.unfoldable_constant dense<[
-// [[[1.0]], [[2.0]], [[3.0]]],
-// [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
-// %res = "mhlo.convolution"(%inputs, %weights) {
-// batch_group_count = 1 : i64,
-// dimension_numbers = {
-// input_batch_dimension = 0 : i64,
-// input_feature_dimension = 3 : i64,
-// input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
-// kernel_input_feature_dimension = 2 : i64,
-// kernel_output_feature_dimension = 3 : i64,
-// kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
-// output_batch_dimension = 0 : i64,
-// output_feature_dimension = 3 : i64,
-// output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
-// feature_group_count = 1 : i64,
-// padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
-// rhs_dilation = dense<1> : tensor<2xi64>,
-// window_strides = dense<1> : tensor<2xi64>} :
-// (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
-// check.expect_almost_eq_const(%res, dense<[
-// [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
-// [[160.0], [226.0], [247.0], [268.0], [160.0]],
-// [[240.0], [331.0], [352.0], [373.0], [220.0]],
-// [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
-// [[[400.0], [541.0], [562.0], [583.0], [340.0]],
-// [[480.0], [646.0], [667.0], [688.0], [400.0]],
-// [[560.0], [751.0], [772.0], [793.0], [460.0]],
-// [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
-// return
-// }
+func @conv2d_2451x2311_same() attributes { iree.module.export } {
+ %inputs = iree.unfoldable_constant dense<[
+ [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
+ [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
+ [[11.0], [12.0], [13.0], [14.0], [15.0]],
+ [[16.0], [17.0], [18.0], [19.0], [20.0]]],
+ [[[21.0], [22.0], [23.0], [24.0], [25.0]],
+ [[26.0], [27.0], [28.0], [29.0], [30.0]],
+ [[31.0], [32.0], [33.0], [34.0], [35.0]],
+ [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
+ %weights = iree.unfoldable_constant dense<[
+ [[[1.0]], [[2.0]], [[3.0]]],
+ [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
+ %res = "mhlo.convolution"(%inputs, %weights) {
+ batch_group_count = 1 : i64,
+ dimension_numbers = {
+ input_batch_dimension = 0 : i64,
+ input_feature_dimension = 3 : i64,
+ input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
+ kernel_input_feature_dimension = 2 : i64,
+ kernel_output_feature_dimension = 3 : i64,
+ kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
+ output_batch_dimension = 0 : i64,
+ output_feature_dimension = 3 : i64,
+ output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ feature_group_count = 1 : i64,
+ padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
+ rhs_dilation = dense<1> : tensor<2xi64>,
+ window_strides = dense<1> : tensor<2xi64>} :
+ (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
+ check.expect_almost_eq_const(%res, dense<[
+ [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
+ [[160.0], [226.0], [247.0], [268.0], [160.0]],
+ [[240.0], [331.0], [352.0], [373.0], [220.0]],
+ [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
+ [[[400.0], [541.0], [562.0], [583.0], [340.0]],
+ [[480.0], [646.0], [667.0], [688.0], [400.0]],
+ [[560.0], [751.0], [772.0], [793.0], [460.0]],
+ [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
+ return
+}
func @conv2d_no_padding2() attributes { iree.module.export } {
%inputs = iree.unfoldable_constant dense<[
diff --git a/iree/test/e2e/xla_ops/iota.mlir b/iree/test/e2e/xla_ops/iota.mlir
new file mode 100644
index 0000000..c3ff040
--- /dev/null
+++ b/iree/test/e2e/xla_ops/iota.mlir
@@ -0,0 +1,16 @@
+func @iota_dim0() attributes { iree.module.export } {
+ %result = "mhlo.iota"() {iota_dimension = 0 : i64} : () -> tensor<2x3xf32>
+ check.expect_almost_eq_const(%result, dense<[
+ [0.0, 0.0, 0.0],
+ [1.0, 1.0, 1.0]]> : tensor<2x3xf32>) : tensor<2x3xf32>
+ return
+}
+
+
+func @iota_dim1() attributes { iree.module.export } {
+ %result = "mhlo.iota"() {iota_dimension = 1 : i64} : () -> tensor<2x3xf32>
+ check.expect_almost_eq_const(%result, dense<[
+ [0.0, 1.0, 2.0],
+ [0.0, 1.0, 2.0]]> : tensor<2x3xf32>) : tensor<2x3xf32>
+ return
+}
diff --git a/iree/test/e2e/xla_ops/pad.mlir b/iree/test/e2e/xla_ops/pad.mlir
index 7f6df37..537e684 100644
--- a/iree/test/e2e/xla_ops/pad.mlir
+++ b/iree/test/e2e/xla_ops/pad.mlir
@@ -20,3 +20,19 @@
check.expect_eq(%res, %input) : tensor<2x3xi32>
return
}
+
+func @pad_with_interior_padding() attributes { iree.module.export } {
+ %input = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32>
+ %c0 = iree.unfoldable_constant dense<0> : tensor<i32>
+ %res = "mhlo.pad"(%input, %c0) {
+ edge_padding_low = dense<[0, 1]> : tensor<2xi64>,
+ edge_padding_high = dense<[1, 5]> : tensor<2xi64>,
+ interior_padding = dense<[1, 2]> : tensor<2xi64>
+ } : (tensor<2x3xi32>, tensor<i32>) -> tensor<4x13xi32>
+ check.expect_eq_const(%res, dense<[
+ [0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 4, 0, 0, 5, 0, 0, 6, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<4x13xi32>) : tensor<4x13xi32>
+ return
+}
diff --git a/iree/tools/CMakeLists.txt b/iree/tools/CMakeLists.txt
index 93f0b2f..a277877 100644
--- a/iree/tools/CMakeLists.txt
+++ b/iree/tools/CMakeLists.txt
@@ -24,6 +24,16 @@
"iree::hal::vulkan::vulkan_driver_module"
)
+if(IREE_ENABLE_EMITC)
+ set(IREE_OPT_CONDITIONAL_DEPS
+ MLIREmitC
+ )
+ set(IREE_TRANSLATE_CONDITIONAL_DEPS
+ MLIREmitC
+ MLIRTargetCpp
+ )
+endif()
+
iree_cc_binary(
NAME
iree-benchmark-module
@@ -255,6 +265,7 @@
iree::compiler::Dialect::Vulkan::IR
iree::compiler::Translation::IREEVM
tensorflow::mlir_hlo
+ ${IREE_OPT_CONDITIONAL_DEPS}
PUBLIC
)
@@ -280,6 +291,7 @@
iree::compiler::Dialect::VM::Target::Bytecode
iree::compiler::Dialect::VM::Target::init_targets
iree::compiler::Translation::IREEVM
+ ${IREE_TRANSLATE_CONDITIONAL_DEPS}
PUBLIC
)
diff --git a/iree/tools/init_mlir_passes.h b/iree/tools/init_mlir_passes.h
index 814d844..58dede2 100644
--- a/iree/tools/init_mlir_passes.h
+++ b/iree/tools/init_mlir_passes.h
@@ -33,6 +33,7 @@
#include "mlir/Dialect/Quant/Passes.h"
#include "mlir/Dialect/SCF/Passes.h"
#include "mlir/Dialect/SPIRV/Passes.h"
+#include "mlir/Dialect/Shape/Transforms/Passes.h"
#include "mlir/Transforms/LocationSnapshot.h"
#include "mlir/Transforms/Passes.h"
@@ -136,6 +137,10 @@
quant::createConvertSimulatedQuantPass();
quant::createConvertConstPass();
+ // Shape
+#define GEN_PASS_REGISTRATION
+#include "mlir/Dialect/Shape/Transforms/Passes.h.inc"
+
// SPIR-V
spirv::createLowerABIAttributesPass();
createConvertGPUToSPIRVPass();
diff --git a/iree/tools/opt_main.cc b/iree/tools/opt_main.cc
index 57ebe15..260901c 100644
--- a/iree/tools/opt_main.cc
+++ b/iree/tools/opt_main.cc
@@ -37,6 +37,10 @@
#include "mlir/Support/FileUtilities.h"
#include "mlir/Support/MlirOptMain.h"
+#ifdef IREE_HAVE_EMITC_DIALECT
+#include "emitc/InitDialect.h"
+#endif // IREE_HAVE_EMITC_DIALECT
+
static llvm::cl::opt<std::string> inputFilename(llvm::cl::Positional,
llvm::cl::desc("<input file>"),
llvm::cl::init("-"));
@@ -74,6 +78,9 @@
int main(int argc, char **argv) {
mlir::registerMlirDialects();
mlir::registerMlirPasses();
+#ifdef IREE_HAVE_EMITC_DIALECT
+ mlir::registerEmitCDialect();
+#endif // IREE_HAVE_EMITC_DIALECT
mlir::registerXLADialects();
mlir::iree_compiler::registerIreeDialects();
mlir::iree_compiler::registerIreeCompilerModuleDialects();
diff --git a/iree/tools/run_module_main.cc b/iree/tools/run_module_main.cc
index 96d0584..41baed1 100644
--- a/iree/tools/run_module_main.cc
+++ b/iree/tools/run_module_main.cc
@@ -142,8 +142,8 @@
RETURN_IF_ERROR(PrintVariantList(output_descs, outputs.get()))
<< "printing results";
- iree_vm_list_deinitialize(inputs.get());
- iree_vm_list_deinitialize(outputs.get());
+ inputs.reset();
+ outputs.reset();
iree_vm_module_release(hal_module);
iree_vm_module_release(input_module);
iree_hal_device_release(device);
diff --git a/iree/tools/translate_main.cc b/iree/tools/translate_main.cc
index 97b4334..50edac7 100644
--- a/iree/tools/translate_main.cc
+++ b/iree/tools/translate_main.cc
@@ -38,6 +38,11 @@
#include "mlir/Support/ToolUtilities.h"
#include "mlir/Translation.h"
+#ifdef IREE_HAVE_EMITC_DIALECT
+#include "emitc/InitDialect.h"
+#include "emitc/InitTranslation.h"
+#endif // IREE_HAVE_EMITC_DIALECT
+
static llvm::cl::opt<std::string> inputFilename(llvm::cl::Positional,
llvm::cl::desc("<input file>"),
llvm::cl::init("-"));
@@ -56,12 +61,18 @@
llvm::InitLLVM y(argc, argv);
mlir::registerMlirDialects();
+#ifdef IREE_HAVE_EMITC_DIALECT
+ mlir::registerEmitCDialect();
+#endif // IREE_HAVE_EMITC_DIALECT
mlir::registerXLADialects();
mlir::iree_compiler::registerIreeDialects();
mlir::iree_compiler::registerIreeCompilerModuleDialects();
mlir::iree_compiler::registerHALTargetBackends();
mlir::iree_compiler::registerVMTargets();
mlir::registerMlirTranslations();
+#ifdef IREE_HAVE_EMITC_DIALECT
+ mlir::registerEmitCTranslation();
+#endif // IREE_HAVE_EMITC_DIALECT
mlir::iree_compiler::registerIreeTranslations();
mlir::iree_compiler::registerLinalgToSPIRVPasses();
diff --git a/iree/vm/bytecode_dispatch.c b/iree/vm/bytecode_dispatch.c
index 86b5d1d..2565c5e 100644
--- a/iree/vm/bytecode_dispatch.c
+++ b/iree/vm/bytecode_dispatch.c
@@ -209,7 +209,7 @@
#define DECLARE_DISPATCH_OPC(ordinal, name) &&_dispatch_##name,
#define DECLARE_DISPATCH_RSV(ordinal) &&_dispatch_unhandled,
static const void* kDispatchTable[256] = {
- IREE_VM_OP_TABLE(DECLARE_DISPATCH_OPC, DECLARE_DISPATCH_RSV)};
+ IREE_VM_OP_CORE_TABLE(DECLARE_DISPATCH_OPC, DECLARE_DISPATCH_RSV)};
#define DISPATCH_UNHANDLED() \
_dispatch_unhandled: \
@@ -238,7 +238,7 @@
return IREE_STATUS_UNIMPLEMENTED;
#define DISPATCH_OP(op_name, body) \
- case IREE_VM_OP_##op_name: \
+ case IREE_VM_OP_CORE_##op_name: \
IREE_DISPATCH_LOG_OPCODE(#op_name); \
body; \
break;
@@ -608,7 +608,9 @@
DISPATCH_OP_CAST_I32(TruncI32I8, uint8_t, uint32_t);
DISPATCH_OP_CAST_I32(TruncI32I16, uint16_t, uint32_t);
DISPATCH_OP_CAST_I32(ExtI8I32S, int8_t, int32_t);
+ DISPATCH_OP_CAST_I32(ExtI8I32U, uint8_t, uint32_t);
DISPATCH_OP_CAST_I32(ExtI16I32S, int16_t, int32_t);
+ DISPATCH_OP_CAST_I32(ExtI16I32U, uint16_t, uint32_t);
//===------------------------------------------------------------------===//
// Native bitwise shifts and rotates
@@ -642,12 +644,6 @@
DISPATCH_OP_CMP_I32(CmpNEI32, int32_t, !=);
DISPATCH_OP_CMP_I32(CmpLTI32S, int32_t, <);
DISPATCH_OP_CMP_I32(CmpLTI32U, uint32_t, <);
- DISPATCH_OP_CMP_I32(CmpLTEI32S, int32_t, <=);
- DISPATCH_OP_CMP_I32(CmpLTEI32U, uint32_t, <=);
- DISPATCH_OP_CMP_I32(CmpGTI32S, int32_t, >);
- DISPATCH_OP_CMP_I32(CmpGTI32U, uint32_t, >);
- DISPATCH_OP_CMP_I32(CmpGTEI32S, int32_t, >=);
- DISPATCH_OP_CMP_I32(CmpGTEI32U, uint32_t, >=);
DISPATCH_OP(CmpNZI32, {
int32_t operand = VM_DecOperandRegI32("operand");
int32_t* result = VM_DecResultRegI32("result");
@@ -891,6 +887,16 @@
pc = block_pc;
});
+ //===------------------------------------------------------------------===//
+ // Extension trampolines
+ //===------------------------------------------------------------------===//
+
+ DISPATCH_OP(PrefixExtI64, { return IREE_STATUS_UNIMPLEMENTED; });
+
+ DISPATCH_OP(PrefixExtF32, { return IREE_STATUS_UNIMPLEMENTED; });
+
+ DISPATCH_OP(PrefixExtF64, { return IREE_STATUS_UNIMPLEMENTED; });
+
// NOLINTNEXTLINE(misc-static-assert)
DISPATCH_UNHANDLED();
}
diff --git a/iree/vm/module.h b/iree/vm/module.h
index e278992..3081e10 100644
--- a/iree/vm/module.h
+++ b/iree/vm/module.h
@@ -210,7 +210,7 @@
// attributes.
// Returns IREE_STATUS_NOT_FOUND if index >= the number of attributes for
// the function.
- // See: docs/function_abi.md
+ // See: docs/design_docs/function_abi.md
iree_status_t(IREE_API_PTR* get_function_reflection_attr)(
void* self, iree_vm_function_linkage_t linkage, int32_t ordinal,
int32_t index, iree_string_view_t* key, iree_string_view_t* value);
@@ -277,7 +277,7 @@
// Returns the empty string if the reflection data in general or the specific
// key is not found.
//
-// See: docs/function_abi.md for documentation on the ABI.
+// See: docs/design_docs/function_abi.md for documentation on the ABI.
IREE_API_EXPORT iree_string_view_t IREE_API_CALL
iree_vm_function_reflection_attr(const iree_vm_function_t* function,
iree_string_view_t key);
@@ -289,7 +289,7 @@
// attributes.
// Returns IREE_STATUS_NOT_FOUND if index >= the number of attributes for
// the function.
-// See: docs/function_abi.md
+// See: docs/design_docs/function_abi.md
IREE_API_EXPORT iree_status_t IREE_API_CALL
iree_vm_get_function_reflection_attr(iree_vm_function_t function, int32_t index,
iree_string_view_t* key,
diff --git a/iree/vm/test/BUILD b/iree/vm/test/BUILD
index 5018865..c33c07d 100644
--- a/iree/vm/test/BUILD
+++ b/iree/vm/test/BUILD
@@ -25,6 +25,7 @@
name = "all_bytecode_modules_cc",
srcs = [
":arithmetic_ops.module",
+ ":comparison_ops.module",
":control_flow_ops.module",
":list_ops.module",
],
@@ -41,6 +42,12 @@
)
iree_bytecode_module(
+ name = "comparison_ops",
+ src = "comparison_ops.mlir",
+ flags = ["-iree-vm-ir-to-bytecode-module"],
+)
+
+iree_bytecode_module(
name = "control_flow_ops",
src = "control_flow_ops.mlir",
flags = ["-iree-vm-ir-to-bytecode-module"],
diff --git a/iree/vm/test/CMakeLists.txt b/iree/vm/test/CMakeLists.txt
index cb6951f..747595b 100644
--- a/iree/vm/test/CMakeLists.txt
+++ b/iree/vm/test/CMakeLists.txt
@@ -19,6 +19,7 @@
all_bytecode_modules_cc
GENERATED_SRCS
"arithmetic_ops.module"
+ "comparison_ops.module"
"control_flow_ops.module"
"list_ops.module"
CC_FILE_OUTPUT
@@ -43,6 +44,16 @@
iree_bytecode_module(
NAME
+ comparison_ops
+ SRC
+ "comparison_ops.mlir"
+ FLAGS
+ "-iree-vm-ir-to-bytecode-module"
+ PUBLIC
+)
+
+iree_bytecode_module(
+ NAME
control_flow_ops
SRC
"control_flow_ops.mlir"
diff --git a/iree/vm/test/comparison_ops.mlir b/iree/vm/test/comparison_ops.mlir
new file mode 100644
index 0000000..7161cb6
--- /dev/null
+++ b/iree/vm/test/comparison_ops.mlir
@@ -0,0 +1,172 @@
+vm.module @comparison_ops {
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i32.s
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_s_0
+ vm.func @test_cmp_lt_s_0() {
+ %lhs = vm.const.i32 2 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 -2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0 : i32
+ vm.check.eq %actual, %expected, "2 < -2" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_s_1
+ vm.func @test_cmp_lt_s_1() {
+ %lhs = vm.const.i32 -2 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1 : i32
+ vm.check.eq %actual, %expected, "-2 < 2" : i32
+ vm.return
+ }
+
+ // Expect UINT_MAX to be interpreted as -1 when doing a signed compare.
+ vm.export @test_cmp_lt_s_2
+ vm.func @test_cmp_lt_s_2() {
+ %lhs = vm.const.i32 4294967295 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1 : i32
+ vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.lt.i32.u
+ //===--------------------------------------------------------------------===//
+
+ vm.export @test_cmp_lt_u_0
+ vm.func @test_cmp_lt_u_0() {
+ %lhs = vm.const.i32 2 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 -2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 1 : i32
+ vm.check.eq %actual, %expected, "2 < -2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_1
+ vm.func @test_cmp_lt_u_1() {
+ %lhs = vm.const.i32 -2 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0 : i32
+ vm.check.eq %actual, %expected, "-2 < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ vm.export @test_cmp_lt_u_2
+ vm.func @test_cmp_lt_u_2() {
+ %lhs = vm.const.i32 4294967295 : i32
+ %lhs_dno = iree.do_not_optimize(%lhs) : i32
+ %rhs = vm.const.i32 2 : i32
+ %rhs_dno = iree.do_not_optimize(%rhs) : i32
+ %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
+ %expected = vm.const.i32 0 : i32
+ vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2 (as unsigned)" : i32
+ vm.return
+ }
+
+ //===--------------------------------------------------------------------===//
+ // vm.cmp.*.i32.* pseudo-ops
+ //===--------------------------------------------------------------------===//
+ // NOTE: all of these are turned in to some variants of vm.cmp.lt by the
+ // compiler and are here as a way to test the runtime behavior of the
+ // pseudo-op expansions.
+
+ vm.export @test_cmp_lte
+ vm.func @test_cmp_lte() {
+ %true = vm.const.i32 1 : i32
+ %false = vm.const.i32 0 : i32
+
+ %cn2 = vm.const.i32 -2 : i32
+ %cn2_dno = iree.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2 : i32
+ %c2_dno = iree.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.lte.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
+ %cmp_1 = vm.cmp.lte.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %false, "2 <= -2" : i32
+ %cmp_2 = vm.cmp.lte.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %true, "2 <= 2" : i32
+
+ %cmp_3 = vm.cmp.lte.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %false, "-2 <= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.lte.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %true, "2 <= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.lte.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %true, "2 <= 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gt
+ vm.func @test_cmp_gt() {
+ %true = vm.const.i32 1 : i32
+ %false = vm.const.i32 0 : i32
+
+ %cn2 = vm.const.i32 -2 : i32
+ %cn2_dno = iree.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2 : i32
+ %c2_dno = iree.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.gt.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %false, "-2 > 2" : i32
+ %cmp_1 = vm.cmp.gt.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %true, "2 > -2" : i32
+ %cmp_2 = vm.cmp.gt.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %false, "2 > 2" : i32
+
+ %cmp_3 = vm.cmp.gt.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %true, "-2 > 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gt.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %false, "2 > -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gt.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %false, "2 > 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+ vm.export @test_cmp_gte
+ vm.func @test_cmp_gte() {
+ %true = vm.const.i32 1 : i32
+ %false = vm.const.i32 0 : i32
+
+ %cn2 = vm.const.i32 -2 : i32
+ %cn2_dno = iree.do_not_optimize(%cn2) : i32
+ %c2 = vm.const.i32 2 : i32
+ %c2_dno = iree.do_not_optimize(%c2) : i32
+
+ %cmp_0 = vm.cmp.gte.i32.s %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
+ %cmp_1 = vm.cmp.gte.i32.s %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_1, %true, "2 >= -2" : i32
+ %cmp_2 = vm.cmp.gte.i32.s %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_2, %true, "2 >= 2" : i32
+
+ %cmp_3 = vm.cmp.gte.i32.u %cn2_dno, %c2_dno : i32
+ vm.check.eq %cmp_3, %true, "-2 >= 2 (unsigned)" : i32
+ %cmp_4 = vm.cmp.gte.i32.u %c2_dno, %cn2_dno : i32
+ vm.check.eq %cmp_4, %false, "2 >= -2 (unsigned)" : i32
+ %cmp_5 = vm.cmp.gte.i32.u %c2_dno, %c2_dno : i32
+ vm.check.eq %cmp_5, %true, "2 >= 2 (unsigned)" : i32
+
+ vm.return
+ }
+
+}
diff --git a/kokoro/gcp_ubuntu/bazel/bindings/common.cfg b/kokoro/gcp_ubuntu/bazel/bindings/common.cfg
index d4a4e26..8a49430 100644
--- a/kokoro/gcp_ubuntu/bazel/bindings/common.cfg
+++ b/kokoro/gcp_ubuntu/bazel/bindings/common.cfg
@@ -17,4 +17,4 @@
# Common configuration for Kokoro builds that run the bindings build with bazel
# on linux.
-build_file: "iree/kokoro/gcp_ubuntu/bazel/bindings/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/bindings/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/core/common.cfg b/kokoro/gcp_ubuntu/bazel/core/common.cfg
index b8ccaa7..3a22d10 100755
--- a/kokoro/gcp_ubuntu/bazel/core/common.cfg
+++ b/kokoro/gcp_ubuntu/bazel/core/common.cfg
@@ -17,4 +17,4 @@
# Common configuration for Kokoro builds that run the core build with bazel on
# linux.
-build_file: "iree/kokoro/gcp_ubuntu/bazel/core/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/core/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/bazel/integrations/common.cfg b/kokoro/gcp_ubuntu/bazel/integrations/common.cfg
index 9331508..eb31e55 100644
--- a/kokoro/gcp_ubuntu/bazel/integrations/common.cfg
+++ b/kokoro/gcp_ubuntu/bazel/integrations/common.cfg
@@ -17,4 +17,4 @@
# Common configuration for Kokoro builds that run the integrations build with
# bazel on linux.
-build_file: "iree/kokoro/gcp_ubuntu/bazel/integrations/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/bazel/linux/x86/integrations/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg b/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
index 8159f36..1376e08 100644
--- a/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
+++ b/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/common.cfg
@@ -17,4 +17,4 @@
# Common configuration for Kokoro builds that cross-compile IREE towards
# Android arm64-v8a using CMake.
-build_file: "iree/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/android/arm64-v8a/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/common.cfg b/kokoro/gcp_ubuntu/cmake/common.cfg
index 838f4a5..49e6865 100644
--- a/kokoro/gcp_ubuntu/cmake/common.cfg
+++ b/kokoro/gcp_ubuntu/cmake/common.cfg
@@ -16,4 +16,4 @@
# Common configuration for Kokoro builds that run cmake on linux.
-build_file: "iree/kokoro/gcp_ubuntu/cmake/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86/build_kokoro.sh"
diff --git a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg b/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
index 9e6847d..bdb9163 100644
--- a/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
+++ b/kokoro/gcp_ubuntu/cmake/linux/x86-turing/common.cfg
@@ -16,4 +16,4 @@
# Common configuration for Kokoro builds that run cmake on linux.
-build_file: "iree/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build_kokoro.sh"
diff --git a/packaging/python/common_setup.py b/packaging/python/common_setup.py
index cbaf1d1..149dbdf 100644
--- a/packaging/python/common_setup.py
+++ b/packaging/python/common_setup.py
@@ -16,6 +16,7 @@
import platform
import setuptools
import sys
+import sysconfig
from datetime import date
@@ -99,15 +100,6 @@
}
-def get_native_file_extension():
- if platform.system() == "Windows":
- return "pyd"
- elif platform.system() == "Darwin":
- return "dylib"
- else:
- return "so"
-
-
def setup(**kwargs):
# See: https://stackoverflow.com/q/45150304
try:
@@ -128,7 +120,7 @@
# Unfortunately, bazel is imprecise and scatters .so files around, so
# need to be specific.
package_data = {
- "": ["*.%s" % (get_native_file_extension(),)],
+ "": ["*%s" % (sysconfig.get_config_var("EXT_SUFFIX"),)],
}
setuptools.setup(
package_data=package_data,
diff --git a/packaging/python/setup_compiler.py b/packaging/python/setup_compiler.py
index c33d7fd..ee9e87d 100644
--- a/packaging/python/setup_compiler.py
+++ b/packaging/python/setup_compiler.py
@@ -39,7 +39,12 @@
print("Found packages:", packages)
setup_kwargs = common_setup.get_setup_defaults(
sub_project="compiler", description="IREE Generic Compiler")
- common_setup.setup(packages=packages, **setup_kwargs)
+ common_setup.setup(
+ packages=packages,
+ ext_modules=[
+ setuptools.Extension(name="pyiree.compiler.binding", sources=[]),
+ ],
+ **setup_kwargs)
if __name__ == "__main__":
diff --git a/packaging/python/setup_rt.py b/packaging/python/setup_rt.py
index ef39248..af30fd6 100644
--- a/packaging/python/setup_rt.py
+++ b/packaging/python/setup_rt.py
@@ -35,7 +35,12 @@
setup_kwargs = common_setup.get_setup_defaults(
sub_project="rt",
description="IREE Runtime Components (for executing compiled programs)")
- common_setup.setup(packages=packages, **setup_kwargs)
+ common_setup.setup(
+ packages=packages,
+ ext_modules=[
+ setuptools.Extension(name="pyiree.rt.binding", sources=[]),
+ ],
+ **setup_kwargs)
if __name__ == "__main__":
diff --git a/packaging/python/setup_tf.py b/packaging/python/setup_tf.py
index 6f97d70..9eccf4f 100644
--- a/packaging/python/setup_tf.py
+++ b/packaging/python/setup_tf.py
@@ -45,7 +45,12 @@
sub_project="tf",
description="IREE TensorFlow Compiler",
package_dir=package_dir)
- common_setup.setup(packages=packages, **setup_kwargs)
+ common_setup.setup(
+ packages=packages,
+ ext_modules=[
+ setuptools.Extension(name="pyiree.tf.compiler.binding", sources=[]),
+ ],
+ **setup_kwargs)
if __name__ == "__main__":
diff --git a/scripts/prepare_doc_publication.py b/scripts/prepare_doc_publication.py
index a4bbad9..cc396dc 100755
--- a/scripts/prepare_doc_publication.py
+++ b/scripts/prepare_doc_publication.py
@@ -59,12 +59,8 @@
'getting_started_android_cmake.md': 'Android with CMake',
'generic_vulkan_env_setup.md': 'Generic Vulkan Setup',
'getting_started_python.md': 'Python',
- 'cmake_options_and_variables.md': 'CMake Options and Variables',
- 'op_coverage.md': 'XLA HLO Operation Coverage',
- 'e2e_coverage.md': 'TensorFlow E2E Coverage',
- 'roadmap.md': 'Short-term Focus Areas',
- 'roadmap_design.md': 'Long-term Design Roadmap',
- 'iree_community.md': 'Community',
+ 'milestones.md': 'Short-term Focus Areas',
+ 'design_roadmap.md': 'Long-term Design Roadmap',
}
# A dictionary containing source file to permanent link mappings.
@@ -75,25 +71,6 @@
# allows one to override the permanent link if necessary.
PERMALINK_DICT = {
'index.md': '/',
- 'getting_started_linux_bazel.md': 'GetStarted/LinuxBazel',
- 'getting_started_linux_cmake.md': 'GetStarted/LinuxCMake',
- 'getting_started_linux_vulkan.md': 'GetStarted/LinuxVulkan',
- 'getting_started_windows_bazel.md': 'GetStarted/WindowsBazel',
- 'getting_started_windows_cmake.md': 'GetStarted/WindowsCMake',
- 'getting_started_windows_vulkan.md': 'GetStarted/WindowsVulkan',
- 'getting_started_macos_cmake.md': 'GetStarted/macOSCMake',
- 'getting_started_macos_vulkan.md': 'GetStarted/macOSVulkan',
- 'getting_started_android_cmake.md': 'GetStarted/AndroidCMake',
- 'generic_vulkan_env_setup.md': 'GetStarted/GenericVulkanSetup',
- 'getting_started_python.md': 'GetStarted/Python',
- 'cmake_options_and_variables.md': 'GetStarted/CMakeOptionsVariables',
- 'developer_overview.md': 'DeveloperOverview',
- 'testing_guide.md': 'TestingGuide',
- 'op_coverage.md': 'HLOOpCoverage',
- 'e2e_coverage.md': 'TensorFlowE2ECoverage',
- 'roadmap.md': 'FocusAreas',
- 'roadmap_design.md': 'DesignRoadmap',
- 'iree_community.md': 'Community',
}
# A dictionary containing source file to navigation order mappings.
@@ -102,15 +79,18 @@
# the left panel of https://google.github.io/iree website. This allows one
# to specify an order for a specific doc.
NAVI_ORDER_DICT = {
+ # Top level entries
'index.md': 1,
- # 'Getting Started' is 2.
- 'developer_overview.md': 3,
- 'roadmap_design.md': 4,
- 'roadmap.md': 5,
- 'op_coverage.md': 6,
- 'e2e_coverage.md': 7,
- 'testing_guide.md': 8,
+ # 'Using IREE' is 2.
+ # 'Getting Started' is 3.
+ # 'Developing IREE' is 4.
+ 'design_roadmap.md': 5,
+ 'milestones.md': 6,
+ 'xla_op_coverage.md': 7,
+ 'tf_e2e_coverage.md': 8,
'iree_community.md': 9,
+ # 'Design Docs' is 10.
+ # 'Dialect Definitions' is 11.
# Within 'Getting Started' use explicit ordering.
# Alphabetical would put 'bazel' before 'cmake' and 'python' between 'linux'
@@ -127,6 +107,16 @@
'getting_started_python.md': 10,
'generic_vulkan_env_setup.md': 11,
'cmake_options_and_variables.md': 12,
+
+ # Within 'Developing IREE' use explicit ordering.
+ 'developer_overview.md': 1,
+ 'contributor_tips.md': 2,
+ 'testing_guide.md': 3,
+ 'benchmarking.md': 4,
+ 'repository_management.md': 5,
+
+ # Within 'Using IREE' use explicit ordering.
+ 'using_colab.md': 1,
}
# A dictionary containing source directory to section tile mappings.
@@ -137,14 +127,17 @@
# Note that the title here must match with index.md file's title under the
# subdirectory.
DIRECTORY_TITLE_DICT = {
+ 'design_docs': 'Design Docs',
+ 'developing_iree': 'Developing IREE',
'Dialects': 'Dialect Definitions',
- 'GetStarted': 'Getting Started',
+ 'get_started': 'Getting Started',
+ 'using_iree': 'Using IREE',
}
# A dictionary containing the supporting JavaScript files for each doc.
JS_FILES_DICT = {
- 'op_coverage.md': ['js/add_classes.js'],
- 'e2e_coverage.md': ['js/add_classes.js'],
+ 'xla_op_coverage.md': ['js/add_classes.js'],
+ 'tf_e2e_coverage.md': ['js/add_classes.js'],
}
@@ -164,19 +157,20 @@
# Use the default layout for everything.
front_matter['layout'] = 'default'
# Use the base filename as permanent link.
- front_matter['permalink'] = base_name
+ # Replace '_' with '-'. Underscores are not typical in URLs...
+ front_matter['permalink'] = base_name.replace('_', '-')
# Organize each doc to a section matching its directory structure.
if relpath and relpath != '.':
- front_matter['parent'] = relpath
- front_matter['permalink'] = f'{relpath}/{front_matter["permalink"]}'
+ hyphen_relpath = relpath.replace('_', '-')
+ front_matter['permalink'] = f'{hyphen_relpath}/{front_matter["permalink"]}'
# Find the title and TOC.
lines = content.splitlines()
title_line_index = None
toc_index = None
for (index, line) in enumerate(lines):
- if line.startswith('# '):
+ if line.startswith('# ') and title_line_index is None:
title_line_index = index
if line == '[TOC]':
toc_index = index
diff --git a/scripts/update_e2e_coverage.py b/scripts/update_e2e_coverage.py
index ea691e4..e9cf397 100755
--- a/scripts/update_e2e_coverage.py
+++ b/scripts/update_e2e_coverage.py
@@ -64,6 +64,7 @@
- vulkan-spirv
The table shows the supported TensorFlow functions and models on each backend.
+It is auto-generated from IREE's test status.
"""
@@ -172,7 +173,7 @@
content.append(generate_table(test_suite))
content = '\n\n'.join(content) + '\n' # Trailing newline.
- table_path = os.path.join(args.build_dir, 'doc', 'e2e_coverage.md')
+ table_path = os.path.join(args.build_dir, 'doc', 'tf_e2e_coverage.md')
with open(table_path, 'w', encoding='utf-8') as f:
f.write(E2E_COVERAGE_DESCRIPTION)
f.write(content)
diff --git a/scripts/update_op_coverage.py b/scripts/update_op_coverage.py
index b01b687..c2c6dd9 100755
--- a/scripts/update_op_coverage.py
+++ b/scripts/update_op_coverage.py
@@ -30,14 +30,15 @@
E2E_XLA_OPS_PATH = 'iree/test/e2e/xla_ops'
# TODO(scotttodd): LLVM AOT (dylib-llvm-aot) HAL target(s)
-OP_COVERAGE_DESCRIPTION = """# HLO Op Coverage
+OP_COVERAGE_DESCRIPTION = """# XLA HLO Op Coverage
There are three backend [targets](https://github.com/google/iree/tree/main/iree/compiler/Dialect/HAL/Target) in IREE:
- vmla
- llvm-ir
- vulkan-spirv
-The table shows the supported XLA HLO ops on each backend.
+The table shows the supported XLA HLO ops on each backend. It is auto-generated
+from IREE's test status.
"""
@@ -117,7 +118,7 @@
if __name__ == '__main__':
args = parse_arguments()
content = generate_table(args.build_dir)
- table_path = os.path.join(args.build_dir, 'doc', 'op_coverage.md')
+ table_path = os.path.join(args.build_dir, 'doc', 'xla_op_coverage.md')
with open(table_path, 'w', encoding='utf-8') as f:
f.write(OP_COVERAGE_DESCRIPTION)
f.write(content)
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 7ca9b58..eed3331 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 7ca9b589c45302feb28c0b3b0e80088c0901bb40
+Subproject commit eed333149d178b69fdaf39b9419b7ca032520182
diff --git a/third_party/mlir-emitc b/third_party/mlir-emitc
index 67f3cce..80885f8 160000
--- a/third_party/mlir-emitc
+++ b/third_party/mlir-emitc
@@ -1 +1 @@
-Subproject commit 67f3ccebee84f3488b46a8d3ac005178c52ff264
+Subproject commit 80885f899e12d55a45561ef758eea47bb340dbf1
diff --git a/third_party/sdl2 b/third_party/sdl2
index b73f111..a1390ed 160000
--- a/third_party/sdl2
+++ b/third_party/sdl2
@@ -1 +1 @@
-Subproject commit b73f111094da3e380a1774b56b15f16c90ae8e23
+Subproject commit a1390ed39ec77ecfb574bc6fcd5bfc5e3adbdea9
diff --git a/third_party/tensorflow b/third_party/tensorflow
index e36aca0..e29e1f4 160000
--- a/third_party/tensorflow
+++ b/third_party/tensorflow
@@ -1 +1 @@
-Subproject commit e36aca0132fbcde0bc820d56185e3078f97a879d
+Subproject commit e29e1f4e574caab071e93cfb91fa9ee0944cd87c