Merge pull request #6114 from rsuderman:main-to-google
PiperOrigin-RevId: 377579548
diff --git a/build_tools/android/run_benchmarks.py b/build_tools/android/run_benchmarks.py
index df083d3..53caac1 100755
--- a/build_tools/android/run_benchmarks.py
+++ b/build_tools/android/run_benchmarks.py
@@ -32,11 +32,11 @@
BENCHMARK_SUITE_REL_PATH = "benchmark_suites"
# Relative path against root benchmark suite directory.
TENSORFLOW_MODEL_SUITE_REL_PATH = "TensorFlow"
+# Relative path against TensorFlow directory.
+VMFB_REL_PATH = "vmfb"
# The flagfile's filename for compiled Python models.
MODEL_FLAGFILE_NAME = "flagfile"
-# The artifact's filename for compiled Python models.
-MODEL_VMFB_NAME = "compiled.vmfb"
# Root directory to perform benchmarks in on the Android device.
ANDROID_TMP_DIR = "/data/local/tmp/iree-benchmarks"
@@ -209,14 +209,17 @@
Returns:
- A list containing (BenchmarkInfo, context, results) tuples.
"""
- # Push the benchmark tool to the Android device first.
+ model_root_dir = os.path.join(root_build_dir, BENCHMARK_SUITE_REL_PATH,
+ TENSORFLOW_MODEL_SUITE_REL_PATH)
+
+ # Push the benchmark vmfb and tool files to the Android device first.
+ adb_push_to_tmp_dir(os.path.join(model_root_dir, VMFB_REL_PATH),
+ relative_dir="",
+ verbose=verbose)
android_tool_path = adb_push_to_tmp_dir(benchmark_tool,
relative_dir="tools",
verbose=verbose)
- model_root_dir = os.path.join(root_build_dir, BENCHMARK_SUITE_REL_PATH,
- TENSORFLOW_MODEL_SUITE_REL_PATH)
-
results = []
# Push all model artifacts to the device and run them.
@@ -225,9 +228,6 @@
model_benchmark_dir)
print(f"--> benchmark: {benchmark_info} <--")
android_relative_dir = os.path.relpath(model_benchmark_dir, model_root_dir)
- adb_push_to_tmp_dir(os.path.join(model_benchmark_dir, MODEL_VMFB_NAME),
- android_relative_dir,
- verbose=verbose)
android_flagfile_path = adb_push_to_tmp_dir(os.path.join(
model_benchmark_dir, MODEL_FLAGFILE_NAME),
android_relative_dir,
diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
index ac03097..f905453 100644
--- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake
+++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
@@ -98,6 +98,7 @@
# discovering them and execute them on devices.
list(GET _RULE_MODULE_SOURCES ${_INDEX} _MODULE_SOURCE)
set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_MODULE_SOURCE}")
+ set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb")
list(GET _RULE_MODULE_NAMES ${_INDEX} _MODULE_NAME)
list(GET _RULE_MODULE_TAGS ${_INDEX} _MODULE_TAGS)
@@ -152,27 +153,8 @@
foreach (_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES)
set(_BENCHMARK_DIR_NAME
"iree-${_RULE_DRIVER}__${_RULE_TARGET_ARCHITECTURE}__${_BENCHMARK_MODE}")
- set(_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/${_MODULE_DIR_NAME}/${_BENCHMARK_DIR_NAME}")
- set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module")
- list(APPEND _TRANSLATION_ARGS "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}")
- list(APPEND _TRANSLATION_ARGS ${_RULE_TRANSLATION_FLAGS})
-
- set(_VMFB_FILE "${_ARTIFACTS_DIR}/compiled.vmfb")
- add_custom_command(
- OUTPUT "${_VMFB_FILE}"
- COMMAND
- "$<TARGET_FILE:iree_tools_iree-translate>"
- ${_TRANSLATION_ARGS}
- "${_SOURCE_FILE}"
- -o "${_VMFB_FILE}"
- WORKING_DIRECTORY "${_ARTIFACTS_DIR}"
- DEPENDS
- iree_tools_iree-translate
- "${_DOWNLOAD_TARGET_NAME}"
- COMMENT "Generating ${_VMFB_FILE}"
- )
-
+ # A list of name segments for composing unique CMake target names.
set(_COMMON_NAME_SEGMENTS "${_MODULE_NAME}")
string(REPLACE "," "-" _TAGS "${_MODULE_TAGS}")
string(REPLACE "," "-" _MODE "${_BENCHMARK_MODE}")
@@ -180,28 +162,62 @@
"${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}"
"${_RULE_TARGET_ARCHITECTURE}")
- # Construct the benchmark artifact generation target name, which is the module
- # name, followed by benchmark mode, target backend, and configuration.
- set(_TRANSLATION_TARGET_NAME_LIST "iree-generate-benchmark-artifact")
- list(APPEND _TRANSLATION_TARGET_NAME_LIST ${_COMMON_NAME_SEGMENTS})
- list(JOIN _TRANSLATION_TARGET_NAME_LIST "__" _TRANSLATION_TARGET_NAME)
+ # The full list of translation flags.
+ set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module")
+ list(APPEND _TRANSLATION_ARGS "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}")
+ list(SORT _RULE_TRANSLATION_FLAGS)
+ list(APPEND _TRANSLATION_ARGS ${_RULE_TRANSLATION_FLAGS})
- add_custom_target("${_TRANSLATION_TARGET_NAME}"
- DEPENDS "${_VMFB_FILE}"
+ # Get a unique identifier for this IREE module file by hashing the command
+ # line flags and input file. We will also use this for the CMake target.
+ string(SHA1 _VMFB_HASH "${_TRANSLATION_ARGS};${_SOURCE_FILE}")
+
+ set(_TRANSLATION_TARGET_NAME "iree-generate-benchmark-artifact-${_VMFB_HASH}")
+
+ # Register the target once and share across all benchmarks having the same
+ # MLIR source and translation flags.
+ if(NOT TARGET "${_TRANSLATION_TARGET_NAME}")
+ set(_VMFB_FILE "${_VMFB_ARTIFACTS_DIR}/compiled-${_VMFB_HASH}.vmfb")
+ add_custom_command(
+ OUTPUT "${_VMFB_FILE}"
+ COMMAND
+ "$<TARGET_FILE:iree_tools_iree-translate>"
+ ${_TRANSLATION_ARGS}
+ "${_SOURCE_FILE}"
+ -o "${_VMFB_FILE}"
+ WORKING_DIRECTORY "${_VMFB_ARTIFACTS_DIR}"
+ DEPENDS
+ iree_tools_iree-translate
+ "${_DOWNLOAD_TARGET_NAME}"
+ COMMENT "Generating VMFB for ${_COMMON_NAME_SEGMENTS}"
+ )
+
+ add_custom_target("${_TRANSLATION_TARGET_NAME}"
+ DEPENDS "${_VMFB_FILE}"
+ )
+
+ # Mark dependency so that we have one target to drive them all.
+ add_dependencies(iree-benchmark-suites "${_TRANSLATION_TARGET_NAME}")
+ endif(NOT TARGET "${_TRANSLATION_TARGET_NAME}")
+
+ # Add a friendly target alias for this particular benchmark.
+ set(_FRIENDLY_TARGET_NAME_LIST "iree-generate-benchmark-artifact")
+ list(APPEND _FRIENDLY_TARGET_NAME_LIST ${_COMMON_NAME_SEGMENTS})
+ list(JOIN _FRIENDLY_TARGET_NAME_LIST "__" _FRIENDLY_TARGET_NAME)
+ add_custom_target("${_FRIENDLY_TARGET_NAME}"
+ DEPENDS "${_TRANSLATION_TARGET_NAME}"
)
- # Mark dependency so that we have one target to drive them all.
- add_dependencies(iree-benchmark-suites "${_TRANSLATION_TARGET_NAME}")
-
# Finally create the command and target for the flagfile used to execute the
# generated artifacts.
- set(_FLAG_FILE "${_ARTIFACTS_DIR}/flagfile")
+ set(_FLAGFILE_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/${_MODULE_DIR_NAME}/${_BENCHMARK_DIR_NAME}")
+ set(_FLAG_FILE "${_FLAGFILE_ARTIFACTS_DIR}/flagfile")
set(_ADDITIONAL_ARGS_CL "--additional_args=\"${_RULE_RUNTIME_FLAGS}\"")
add_custom_command(
OUTPUT "${_FLAG_FILE}"
COMMAND
"${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/generate_flagfile.py"
- --module_file=compiled.vmfb
+ --module_file="../../vmfb/compiled-${_VMFB_HASH}.vmfb"
--driver=${_RULE_DRIVER}
--entry_function=${_ENTRY_FUNCTION}
--function_inputs=${_FUNCTION_INPUTS}
@@ -209,7 +225,7 @@
-o "${_FLAG_FILE}"
DEPENDS
"${IREE_ROOT_DIR}/scripts/generate_flagfile.py"
- WORKING_DIRECTORY "${_ARTIFACTS_DIR}"
+ WORKING_DIRECTORY "${_FLAGFILE_ARTIFACTS_DIR}"
COMMENT "Generating ${_FLAG_FILE}"
)
diff --git a/integrations/tensorflow/e2e/keras/applications/BUILD b/integrations/tensorflow/e2e/keras/applications/BUILD
index 35373f9..cd360b9 100644
--- a/integrations/tensorflow/e2e/keras/applications/BUILD
+++ b/integrations/tensorflow/e2e/keras/applications/BUILD
@@ -108,7 +108,10 @@
"iree_llvmaot",
"iree_vulkan",
],
- "model": "VGG19",
+ "model": [
+ "VGG16",
+ "VGG19",
+ ],
},
],
matrix = {
diff --git a/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt b/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
index 8b0ef04..448e0e4 100644
--- a/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
+++ b/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
@@ -27,8 +27,11 @@
"MobileNet;MobileNetV2;ResNet50;VGG16;VGG19"
"tf;tflite;iree_llvmaot;iree_vulkan"
FAILING_CONFIGURATIONS
+ ",,,VGG16,tflite"
",,,VGG19,tflite"
+ ",,,VGG16,iree_llvmaot"
",,,VGG19,iree_llvmaot"
+ ",,,VGG16,iree_vulkan"
",,,VGG19,iree_vulkan"
LABELS
"manual"
diff --git a/integrations/tensorflow/iree_tf_compiler/BUILD b/integrations/tensorflow/iree_tf_compiler/BUILD
index 18fdf5b..793693e 100644
--- a/integrations/tensorflow/iree_tf_compiler/BUILD
+++ b/integrations/tensorflow/iree_tf_compiler/BUILD
@@ -103,8 +103,11 @@
"//iree_tf_compiler/MHLO",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
+ "@llvm-project//mlir:Parser",
"@llvm-project//mlir:Pass",
+ "@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
+ "@org_tensorflow//tensorflow/compiler/mlir/hlo:hlo_dialect_registration",
"@org_tensorflow//tensorflow/compiler/mlir/xla:hlo_to_mlir_hlo",
"@org_tensorflow//tensorflow/compiler/xla/service:hlo_parser",
"@org_tensorflow//tensorflow/compiler/xla/service:hlo_proto_cc",
diff --git a/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD b/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
index 81092f6..986b490 100644
--- a/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
+++ b/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
@@ -42,6 +42,8 @@
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
+ "@llvm-project//mlir:SCFToStandard",
+ "@llvm-project//mlir:SCFTransforms",
"@llvm-project//mlir:Shape",
"@llvm-project//mlir:ShapeTransforms",
"@llvm-project//mlir:StandardOps",
diff --git a/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp b/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
index 4a71f1f..2719f23 100644
--- a/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
@@ -19,6 +19,8 @@
#include "iree/compiler/Dialect/Shape/Conversion/Passes.h"
#include "iree/compiler/Dialect/Shape/Transforms/Passes.h"
#include "mlir-hlo/Dialect/mhlo/transforms/passes.h"
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Dialect/SCF/Passes.h"
#include "mlir/Dialect/Shape/Transforms/Passes.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassRegistry.h"
@@ -42,7 +44,9 @@
pm.addPass(mlir::createInlinerPass());
pm.addNestedPass<FuncOp>(mhlo::createControlFlowToScfPass());
pm.addNestedPass<FuncOp>(mhlo::createLegalizeControlFlowPass());
+ pm.addNestedPass<FuncOp>(mlir::createLowerToCFGPass());
pm.addPass(createFlattenTuplesInCFGPass());
+ pm.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
// Mostly delicate to the IREE side MHLO legalization pipeline, now that
// we have handled the weird that comes from legacy HLO clients.
diff --git a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
index 07562db..dbff7f9 100644
--- a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
@@ -15,12 +15,15 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "mlir-hlo/Dialect/mhlo/IR/register.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/SymbolTable.h"
+#include "mlir/Parser.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/FileUtilities.h"
#include "tensorflow/compiler/mlir/xla/hlo_to_mlir_hlo.h"
@@ -37,6 +40,7 @@
binary_proto,
text_proto,
hlo_text,
+ mlir_text,
};
// Error collector that prints errors.
@@ -103,69 +107,40 @@
llvm::cl::init(""));
static llvm::cl::opt<XlaFormat> inputFormat(
"xla-format", cl::desc("XLA Format"),
- cl::values(clEnumVal(binary_proto, "Parse a binary protocol buffer"),
- clEnumVal(text_proto, "Parse a text protocol buffer"),
- clEnumVal(hlo_text,
- "Parse an HLO module in its native text format")));
+ cl::values(
+ clEnumVal(binary_proto, "Parse a binary protocol buffer"),
+ clEnumVal(text_proto, "Parse a text protocol buffer"),
+ clEnumVal(hlo_text, "Parse an HLO module in its native text format"),
+ clEnumVal(mlir_text, "Parse MLIR text containing MHLO ops")));
// Register any command line options.
registerAsmPrinterCLOptions();
registerMLIRContextCLOptions();
+ registerPassManagerCLOptions();
registerDefaultTimingManagerCLOptions();
cl::ParseCommandLineOptions(argc, argv);
+ auto openInputStream =
+ [&]() -> llvm::Optional<
+ std::pair<std::istream *, std::unique_ptr<std::ifstream>>> {
+ auto fileInputStream = std::make_unique<std::ifstream>();
+ std::istream *inputStream;
+ if (inputPath == "-") {
+ inputStream = &std::cin;
+ } else {
+ fileInputStream->open(inputPath, std::ios::in | std::ios::binary);
+ if (!fileInputStream->is_open()) {
+ llvm::errs() << "Unable to open input file " << inputPath << "\n";
+ return llvm::None;
+ }
+ inputStream = fileInputStream.get();
+ }
+ return std::make_pair(inputStream, std::move(fileInputStream));
+ };
+
DialectRegistry registry;
-
- // Read the protocol buffer.
- std::ifstream fileInputStream;
- std::istream *inputStream;
- if (inputPath == "-") {
- inputStream = &std::cin;
- } else {
- fileInputStream.open(inputPath, std::ios::in | std::ios::binary);
- if (!fileInputStream.is_open()) {
- llvm::errs() << "Unable to open input file " << inputPath << "\n";
- return 1;
- }
- inputStream = &fileInputStream;
- }
-
- xla::HloProto hloProto;
- switch (inputFormat) {
- case binary_proto: {
- if (!hloProto.mutable_hlo_module()->ParseFromIstream(inputStream)) {
- llvm::errs() << "Could not parse binary protocol buffer from "
- << inputPath << "\n";
- return 1;
- }
- break;
- }
- case text_proto: {
- tensorflow::protobuf::TextFormat::Parser parser;
- PrintErrorCollector collector(inputPath);
- IStreamCopyingInputStream copyingStream(inputStream);
- tensorflow::protobuf::io::CopyingInputStreamAdaptor streamAdaptor(
- ©ingStream);
- parser.RecordErrorsTo(&collector);
- parser.Parse(&streamAdaptor, hloProto.mutable_hlo_module());
- if (collector.hadError) {
- llvm::errs() << "Unable to parse text format protocol buffer\n";
- return 1;
- }
- break;
- }
- case hlo_text: {
- if (failed(ReadHloTextFormatFromStream(inputStream,
- hloProto.mutable_hlo_module()))) {
- return 1;
- }
- break;
- }
- default:
- llvm_unreachable("illegal XlaFormat");
- }
-
- // Convert the Module proto into MLIR.
+ mlir::mhlo::registerAllMhloDialects(registry);
+ registry.insert<mlir::StandardOpsDialect>();
MLIRContext context;
OwningModuleRef module = ModuleOp::create(mlir::UnknownLoc::get(&context));
context.appendDialectRegistry(registry);
@@ -174,12 +149,79 @@
llvm::SourceMgr sourceMgr;
mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
- auto status =
- ConvertHloToMlirHlo(module.get(), hloProto.mutable_hlo_module());
- if (!status.ok()) {
- llvm::errs() << "Error converting HLO Module Proto to MLIR: "
- << status.ToString() << "\n";
- return 2;
+ auto loadHloProtoIntoModule = [&](xla::HloProto &hloProto) -> LogicalResult {
+ auto status =
+ ConvertHloToMlirHlo(module.get(), hloProto.mutable_hlo_module());
+ if (!status.ok()) {
+ llvm::errs() << "Error converting HLO Module Proto to MLIR: "
+ << status.ToString() << "\n";
+ return failure();
+ }
+ return success();
+ };
+
+ switch (inputFormat) {
+ case binary_proto: {
+ xla::HloProto hloProto;
+ auto input = openInputStream();
+ if (!input) {
+ return 1;
+ }
+ if (!hloProto.mutable_hlo_module()->ParseFromIstream(input->first)) {
+ llvm::errs() << "Could not parse binary protocol buffer from "
+ << inputPath << "\n";
+ return 1;
+ }
+ if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+ break;
+ }
+ case text_proto: {
+ xla::HloProto hloProto;
+ auto input = openInputStream();
+ if (!input) {
+ return 1;
+ }
+ tensorflow::protobuf::TextFormat::Parser parser;
+ PrintErrorCollector collector(inputPath);
+ IStreamCopyingInputStream copyingStream(input->first);
+ tensorflow::protobuf::io::CopyingInputStreamAdaptor streamAdaptor(
+ ©ingStream);
+ parser.RecordErrorsTo(&collector);
+ parser.Parse(&streamAdaptor, hloProto.mutable_hlo_module());
+ if (collector.hadError) {
+ llvm::errs() << "Unable to parse text format protocol buffer\n";
+ return 1;
+ }
+ if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+ break;
+ }
+ case hlo_text: {
+ xla::HloProto hloProto;
+ auto input = openInputStream();
+ if (!input) {
+ return 1;
+ }
+ if (failed(ReadHloTextFormatFromStream(input->first,
+ hloProto.mutable_hlo_module()))) {
+ return 1;
+ }
+ if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+ break;
+ }
+ case mlir_text: {
+ std::string errorMessage;
+ auto file = openInputFile(inputPath, &errorMessage);
+ if (!file) {
+ llvm::errs() << errorMessage << "\n";
+ return 1;
+ }
+ sourceMgr.AddNewSourceBuffer(std::move(file), SMLoc());
+ module = parseSourceFile(sourceMgr, &context);
+ if (!module) return 2;
+ break;
+ }
+ default:
+ llvm_unreachable("illegal XlaFormat");
}
// Find the entry function and annotate it as exported.
diff --git a/iree/compiler/Conversion/LinalgToLLVM/BUILD b/iree/compiler/Conversion/LinalgToLLVM/BUILD
index 18241c8..1fe6525 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/BUILD
+++ b/iree/compiler/Conversion/LinalgToLLVM/BUILD
@@ -15,7 +15,6 @@
srcs = [
"ConvertToLLVM.cpp",
"KernelDispatch.cpp",
- "LLVMCodeGenOptions.cpp",
"LinalgTileAndVectorizePass.cpp",
"LinalgVectorizePass.cpp",
"LowerExecutableTargetPass.cpp",
@@ -26,7 +25,6 @@
],
hdrs = [
"KernelDispatch.h",
- "LLVMCodeGenOptions.h",
"Passes.h",
],
deps = [
diff --git a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
index 2fdfd52..5a8318e 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -15,12 +15,10 @@
LinalgToLLVM
HDRS
"KernelDispatch.h"
- "LLVMCodeGenOptions.h"
"Passes.h"
SRCS
"ConvertToLLVM.cpp"
"KernelDispatch.cpp"
- "LLVMCodeGenOptions.cpp"
"LinalgTileAndVectorizePass.cpp"
"LinalgVectorizePass.cpp"
"LowerExecutableTargetPass.cpp"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
index 3a4645c..b7354c1 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
@@ -5,7 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h"
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
@@ -594,8 +593,10 @@
class ConvertToLLVMPass
: public PassWrapper<ConvertToLLVMPass, OperationPass<ModuleOp>> {
public:
- ConvertToLLVMPass(LLVMCodegenOptions options) : options_(options) {}
-
+ ConvertToLLVMPass(bool unfuseFMA = false) { unfuseFMAOps = unfuseFMA; }
+ ConvertToLLVMPass(const ConvertToLLVMPass &pass) {
+ unfuseFMAOps = pass.unfuseFMAOps;
+ }
void getDependentDialects(DialectRegistry ®istry) const override {
registry.insert<LLVM::LLVMDialect>();
}
@@ -603,7 +604,10 @@
void runOnOperation() override;
private:
- LLVMCodegenOptions options_;
+ Option<bool> unfuseFMAOps{
+ *this, "unfuse-fma-ops",
+ llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+ llvm::cl::init(false)};
};
} // namespace
@@ -710,7 +714,7 @@
// Post conversion patterns.
{
OwningRewritePatternList postPatterns(&getContext());
- if (options_.unfuseFMAOps) {
+ if (unfuseFMAOps) {
populateUnfusedFMAOpsPassPatterns(&getContext(), postPatterns);
(void)applyPatternsAndFoldGreedily(module, std::move(postPatterns));
}
@@ -718,18 +722,15 @@
}
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
- LLVMCodegenOptions options) {
- return std::make_unique<ConvertToLLVMPass>(options);
+ bool unfuseFMAOps) {
+ return std::make_unique<ConvertToLLVMPass>(unfuseFMAOps);
}
static PassRegistration<ConvertToLLVMPass> pass(
"iree-codegen-convert-to-llvm",
"Perform final conversion from Linalg/HAL/Shape/Vector/Standard to "
"LLVMIR dialect",
- [] {
- return std::make_unique<ConvertToLLVMPass>(
- getLLVMCodegenOptionsFromClOptions());
- });
+ [] { return std::make_unique<ConvertToLLVMPass>(); });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
deleted file mode 100644
index b38838b..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
-
-#include "llvm/Support/CommandLine.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-static llvm::cl::opt<bool> clConvImg2ColConversion(
- "iree-codegen-linalg-to-llvm-conv-img2col-conversion",
- llvm::cl::desc("Enable rewriting linalg.conv_2d_input_nhwc_filter_hwcf "
- "linalg.generic that does img2col buffer packing + "
- "linag.matmul"),
- llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clUnfusedFMA(
- "iree-codegen-linalg-to-llvm-use-unfused-fma",
- llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
- llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clEnableLinalgOnTensorsToVectors(
- "iree-codegen-linalg-to-llvm-linalg-on-tensors-to-vectors",
- llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
- llvm::cl::init(false));
-
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions() {
- LLVMCodegenOptions options;
- options.useConvImg2Col = clConvImg2ColConversion;
- options.unfuseFMAOps = clUnfusedFMA;
- options.useLinalgOnTensorsToVectors = clEnableLinalgOnTensorsToVectors;
- return options;
-}
-
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
deleted file mode 100644
index b9fe214..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-#define IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Options used to configure LLVM passes.
-struct LLVMCodegenOptions {
- bool useConvImg2Col = false;
- // Target specific options.
- bool unfuseFMAOps = false;
- bool useVectorToAarch64 = false;
- bool useLinalgOnTensorsToVectors = false;
-};
-
-// Returns LLVM CodeGen options from command-line options.
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions();
-
-} // namespace iree_compiler
-} // namespace mlir
-
-#endif // IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
index 65f83b5..fce276c 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
@@ -68,11 +68,24 @@
namespace {
struct TileAndVectorizeWorkgroups
: public PassWrapper<TileAndVectorizeWorkgroups, FunctionPass> {
+ TileAndVectorizeWorkgroups(bool vectorize = true)
+ : lowerToVectors(vectorize) {}
+ TileAndVectorizeWorkgroups(const TileAndVectorizeWorkgroups &pass) {
+ lowerToVectors = pass.lowerToVectors;
+ }
void getDependentDialects(DialectRegistry ®istry) const override {
registry.insert<linalg::LinalgDialect, AffineDialect, scf::SCFDialect,
vector::VectorDialect>();
}
void runOnFunction() override;
+
+ private:
+ /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+ /// VMVX backend uses the same lowering as the CPU pass but there is no
+ /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+ /// same tiling scheme but avoid generating vector instructions. When VMVX can
+ /// handle vector instructions, drop this options.
+ bool lowerToVectors;
};
} // namespace
@@ -209,6 +222,10 @@
}
}
+ if (!lowerToVectors) {
+ return;
+ }
+
// Apply vectorization patterns.
{
OwningRewritePatternList vectorizationPatterns(&getContext());
@@ -281,8 +298,9 @@
}
}
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass() {
- return std::make_unique<TileAndVectorizeWorkgroups>();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+ bool lowerToVectors) {
+ return std::make_unique<TileAndVectorizeWorkgroups>(lowerToVectors);
}
static PassRegistration<TileAndVectorizeWorkgroups> pass(
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
index 0354a35..d307ea2 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
@@ -32,9 +32,12 @@
LLVM::LLVMDialect>();
}
- LowerExecutableTargetPass(LLVMCodegenOptions options) : options(options) {}
- LowerExecutableTargetPass(const LowerExecutableTargetPass &pass)
- : options(pass.options) {}
+ LowerExecutableTargetPass(bool vectorize = true)
+ : lowerToVectors(vectorize) {}
+ LowerExecutableTargetPass(const LowerExecutableTargetPass &pass) {
+ invokeLoweringPipelines = pass.invokeLoweringPipelines;
+ lowerToVectors = pass.lowerToVectors;
+ }
void runOnOperation() override;
@@ -47,7 +50,12 @@
"can be set to false for testing purposes."),
llvm::cl::init(true)};
- LLVMCodegenOptions options;
+ /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+ /// VMVX backend uses the same lowering as the CPU pass but there is no
+ /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+ /// same tiling scheme but avoid generating vector instructions. When VMVX can
+ /// handle vector instructions, drop this options.
+ bool lowerToVectors;
};
} // namespace
@@ -64,19 +72,19 @@
OpPassManager executableLoweringPipeline(
IREE::HAL::ExecutableTargetOp::getOperationName());
executableLoweringPipeline.addPass(createSetNumWorkgroupsPass());
+ OpPassManager &nestedModulePM = executableLoweringPipeline.nest<ModuleOp>();
if (invokeLoweringPipelines) {
IREE::HAL::DispatchLoweringPassPipeline passPipeline =
setPipeline.getValue();
switch (passPipeline) {
case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault:
- addCPUDefaultPassPipeline(executableLoweringPipeline, options);
+ addCPUDefaultPassPipeline(nestedModulePM);
break;
case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization:
- addCPUVectorizationPassPipeline(executableLoweringPipeline, options);
+ addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors);
break;
}
- addLowerToLLVMPasses(executableLoweringPipeline, options);
}
if (failed(runPipeline(executableLoweringPipeline, targetOp))) {
@@ -85,18 +93,15 @@
}
std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options) {
- return std::make_unique<LowerExecutableTargetPass>(options);
+createLowerExecutableTargetPass(bool lowerToVectors) {
+ return std::make_unique<LowerExecutableTargetPass>(lowerToVectors);
}
static PassRegistration<LowerExecutableTargetPass> pass(
"iree-lower-executable-target-pass",
- "Perform lowering of executable target to export dialects. Currently "
- "lowers to LLVM dialect",
- [] {
- return std::make_unique<LowerExecutableTargetPass>(
- getLLVMCodegenOptionsFromClOptions());
- });
+ "Perform lowering of executable target using one of the "
+ "IREE::HAL::DispatchLoweringPassPipeline",
+ [] { return std::make_unique<LowerExecutableTargetPass>(); });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
index 9cc021b..3264e6d 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -26,77 +26,72 @@
}
void addCPUVectorizationPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
- nestedModulePM.addPass(createCanonicalizerPass());
+ bool lowerToVectors) {
+ passManager.addPass(createCanonicalizerPass());
// TODO(ataei): This causes segmentation fault on Android. Fix it and
// re-enable.
- // nestedModulePM.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
+ // passManager.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
- // TODO(ataei): We want to enable when tensor -> vector pass is fully
- // supported which requires first moving vector-tiling before this step.
- if (options.useLinalgOnTensorsToVectors) {
- nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
- }
// Use stack allocation on CPU side.
- addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
+ addLinalgBufferizePasses(passManager, cpuAllocationFunction);
// Tile and vectorize linalg ops.
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(
- createLinalgTileAndVectorizeWorkgroupsPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(
+ createLinalgTileAndVectorizeWorkgroupsPass(lowerToVectors));
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
- nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+ passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
}
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
- nestedModulePM.addPass(createCanonicalizerPass());
+void addCPUDefaultPassPipeline(OpPassManager &passManager) {
+ passManager.addPass(createCanonicalizerPass());
// Use stack allocation on CPU side.
- addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
- nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+ addLinalgBufferizePasses(passManager, cpuAllocationFunction);
+ passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
}
-void addLowerToLLVMPasses(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+static void addLowerToLLVMPasses(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
// Linalg -> SCF
- nestedModulePM.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+ passManager.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createCSEPass());
// SCF -> STD
- nestedModulePM.addNestedPass<FuncOp>(createLowerToCFGPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+ passManager.addNestedPass<FuncOp>(createLowerToCFGPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createCSEPass());
// Handled tensor-type constants.
- nestedModulePM.addPass(createTensorConstantBufferizePass());
- nestedModulePM.addPass(createFoldTensorExtractOpPass());
+ passManager.addPass(createTensorConstantBufferizePass());
+ passManager.addPass(createFoldTensorExtractOpPass());
// (HAL, IREE, Linalg, STD) -> LLVM
- nestedModulePM.addPass(createConvertToLLVMPass(options));
+ passManager.addPass(createConvertToLLVMPass());
- nestedModulePM.addPass(createCanonicalizerPass());
- nestedModulePM.addPass(createCSEPass());
+ passManager.addPass(createCanonicalizerPass());
+ passManager.addPass(createCSEPass());
}
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- passManager.addPass(createLowerExecutableTargetPass(options));
+void buildLLVMTransformPassPipeline(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
+ OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+ addLowerToLLVMPasses(nestedModulePM, options);
}
-static PassPipelineRegistration<> linalgLLVMVPipeline(
- "iree-codegen-linalg-to-llvm-pipeline",
- "Runs the progressive lowering pipeline from Linalg to LLVM",
- [](OpPassManager &passManager) {
- buildLLVMTransformPassPipeline(passManager,
- getLLVMCodegenOptionsFromClOptions());
- });
+static PassPipelineRegistration<LLVMTransformPassPipelineOptions>
+ linalgLLVMVPipeline(
+ "iree-codegen-linalg-to-llvm-pipeline",
+ "Runs the progressive lowering pipeline from Linalg to LLVM",
+ [](OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
+ buildLLVMTransformPassPipeline(passManager, options);
+ });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.h b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
index 7f626d6..252c2b0 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.h
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
@@ -7,9 +7,10 @@
#ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
#define IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassOptions.h"
namespace mlir {
namespace iree_compiler {
@@ -22,12 +23,9 @@
/// vector size.
std::unique_ptr<OperationPass<FuncOp>> createPadLinalgWorkgroupTilesPass();
-/// Distributes linalg ops among hal.interface.workgroup logical threads.
-std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLinalgTileAndDistributePass();
-
/// Vectorizes linalg ops executed in the same hal.interface.workgroup.
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+ bool lowerToVectors = true);
/// Replaces llvm.intr.fma with its unfused mul and add ops.
std::unique_ptr<FunctionPass> createUnfusedFMAOpsPass();
@@ -37,42 +35,50 @@
/// Performs the final conversion to LLVM dialect.
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
- LLVMCodegenOptions options);
+ bool unfuseFMAOps = false);
/// Pass to convert Linalg ops into vector operations.
std::unique_ptr<FunctionPass> createLinalgVectorizePass();
//===----------------------------------------------------------------------===//
-// Pass Pipelines for CPU Lowering
+// Pass Pipelines for CPU Lowering.
//===----------------------------------------------------------------------===//
/// Populates the passes to lower to scalars operations for linalg based
/// code-generation. This pipeline does not vectorize, but instead just converts
/// to memrefs
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
+void addCPUDefaultPassPipeline(OpPassManager &passManager);
/// Populates the passes needed to lower to vector operations using linalg based
/// progressive lowering with vectorization after bufferization.
void addCPUVectorizationPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
-
-/// Populates the passes needed to lower scalar/native vector code to LLVM
-/// Dialect.
-void addLowerToLLVMPasses(OpPassManager &passManager,
- LLVMCodegenOptions options);
+ bool lowerToVectors = true);
/// Pass to lower the module an hal.executable.target operation to external
/// dialect. Currently this pass lowers to LLVM dialect, but could be
/// generalized to lower to any "final" dialect like SPIR-V/NVVM, etc.
std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options);
+createLowerExecutableTargetPass(bool lowerToVectors = true);
+
+//===----------------------------------------------------------------------===//
+// Pass Pipelines for lowering to LLVM dialect.
+//===----------------------------------------------------------------------===//
+
+/// Options for LLVM pipeline.
+struct LLVMTransformPassPipelineOptions
+ : public PassPipelineOptions<LLVMTransformPassPipelineOptions> {
+ Option<bool> unfuseFMAOps{
+ *this, "unfuse-fma-ops",
+ llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+ llvm::cl::init(false)};
+};
/// Populates passes needed to lower a XLA HLO op to LLVM dialect via the
/// structured ops path. The pass manager `pm` in here should operate on the
/// module within the IREE::HAL::ExecutableOp.
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
+void buildLLVMTransformPassPipeline(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options);
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 60c27fd..7a74a17 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h
@@ -57,7 +57,8 @@
inline void registerLinalgToLLVMPasses() {
static bool init_once = []() {
- createLowerExecutableTargetPass(LLVMCodegenOptions());
+ createLowerExecutableTargetPass();
+ createLinalgVectorizePass();
// LinalgToLLVM
createLinalgTileAndVectorizeWorkgroupsPass();
createUnfusedFMAOpsPass();
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 6c79804..ad08cb0 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -8,7 +8,6 @@
#include <cstdlib>
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
#include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
#include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h"
@@ -74,13 +73,13 @@
}
void buildTranslationPassPipeline(OpPassManager &passManager) override {
- auto codeGenOptions = getLLVMCodegenOptionsFromClOptions();
+ passManager.addPass(createLowerExecutableTargetPass());
// Set target specific options.
// TODO(ataei): This is temporary here, should move when target specific
// overrides options grows.
llvm::Triple triple(options_.targetTriple);
+ LLVMTransformPassPipelineOptions codeGenOptions;
if (triple.isWasm()) {
- // WebAssembly does not (yet) support FMA ops natively, so unfuse them.
codeGenOptions.unfuseFMAOps = true;
}
buildLLVMTransformPassPipeline(passManager, codeGenOptions);
diff --git a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
index 94829f1..9438429 100644
--- a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
@@ -27,44 +27,18 @@
namespace IREE {
namespace VMVX {
-// NOTE:
-// NOTE: THIS IS ALL JUST A HACK
-// NOTE:
-// NOTE: this entire pipeline needs to be reworked - it's been randomly
-// NOTE: constructed to "work" for a few samples by someone who does not
-// NOTE: understand the codegen system :)
-// NOTE:
-
static void buildVectorVMVXTransformPassPipeline(OpPassManager &passManager) {
+ // For now lower using the default CPU pass-pipeline which doesn't
+ // vectorize. When VMVX can lower vector operations, this can be relaxed.
+ passManager.addPass(
+ createLowerExecutableTargetPass(/*lowerToVectors=*/false));
+
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
// ---------------------------------------------------------------------------
- // Configuration
- // ---------------------------------------------------------------------------
-
- // TODO(#5925): This can also be modified to just use the dynamic pass
- // pipeline like the CPU side.
- // passManager.addPass(createMaterializeCPULaunchConfigurationPass());
- passManager.addPass(createSetNumWorkgroupsPass());
-
- // ---------------------------------------------------------------------------
// Linalg -> Vectors
// ---------------------------------------------------------------------------
- nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
-
- // Use stack allocation for transient buffers.
- WorkgroupMemoryAllocationFn allocationFn =
- [](OpBuilder &builder, Location loc, ArrayRef<int64_t> staticShape,
- Type elementType, ArrayRef<Value> dynamicSizes) {
- MemRefType allocType = MemRefType::get(staticShape, elementType);
- return builder.create<memref::AllocaOp>(loc, allocType, dynamicSizes);
- };
- addLinalgBufferizePasses(nestedModulePM, allocationFn);
- nestedModulePM.addPass(createPromoteBuffersToStackPass(
- /*maxAllocSizeInBytes=*/1 << 10, /*bitwidthOfIndexType=*/32,
- /*maxRankOfAllocatedMemRef=*/10));
-
nestedModulePM.addNestedPass<FuncOp>(createResolveShapeOpsPass());
nestedModulePM.addNestedPass<FuncOp>(
Shape::createCleanupShapePlaceholdersPass());
diff --git a/iree/test/e2e/llvm_specific/BUILD b/iree/test/e2e/llvm_specific/BUILD
deleted file mode 100644
index e79dfcf..0000000
--- a/iree/test/e2e/llvm_specific/BUILD
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2019 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# Tests for end-to-end IREE support specific to the LLVM lowering.
-# TODO(ravishankarm): Reorganize these tests.
-
-load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")
-
-package(
- default_visibility = ["//visibility:public"],
- features = ["layering_check"],
- licenses = ["notice"], # Apache 2.0
-)
-
-iree_check_single_backend_test_suite(
- name = "check_llvm-aot-conv_img2col",
- srcs = [
- "conv.mlir",
- ],
- compiler_flags = [
- "-iree-input-type=mhlo",
- "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true",
- ],
- driver = "dylib",
- target_backend = "dylib-llvm-aot",
-)
diff --git a/iree/test/e2e/llvm_specific/CMakeLists.txt b/iree/test/e2e/llvm_specific/CMakeLists.txt
deleted file mode 100644
index ab49aff..0000000
--- a/iree/test/e2e/llvm_specific/CMakeLists.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
-# iree/test/e2e/llvm_specific/BUILD #
-# #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
-# CMake-only content. #
-# #
-# To disable autogeneration for this file entirely, delete this header. #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_check_single_backend_test_suite(
- NAME
- check_llvm-aot-conv_img2col
- SRCS
- "conv.mlir"
- TARGET_BACKEND
- "dylib-llvm-aot"
- DRIVER
- "dylib"
- COMPILER_FLAGS
- "-iree-input-type=mhlo"
- "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true"
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/llvm_specific/conv.mlir b/iree/test/e2e/llvm_specific/conv.mlir
deleted file mode 100644
index e969347..0000000
--- a/iree/test/e2e/llvm_specific/conv.mlir
+++ /dev/null
@@ -1,257 +0,0 @@
-func @conv2d_nopadding() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0]]]]> : tensor<1x4x4x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[1310.0],[1466.0],[1622.0]],
- [[2090.0],[2246.0],[2402.0]]
- ]]> : tensor<1x2x3x1xf32>) : tensor<1x2x3x1xf32>
- return
-}
-
-func @conv2d_1452x3221_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0], [ 9.0, 10.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0], [39.0, 40.0]]]]> : tensor<1x4x5x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<1x4x5x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[ 600.0], [ 736.0], [ 872.0], [1008.0], [ 476.0]],
- [[1310.0], [1466.0], [1622.0], [1778.0], [ 805.0]],
- [[2090.0], [2246.0], [2402.0], [2558.0], [1135.0]],
- [[1080.0], [1152.0], [1224.0], [1296.0], [ 524.0]]]]> : tensor<1x4x5x1xf32>) : tensor<1x4x5x1xf32>
- return
-}
-
-func @conv2d_2451x2311_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
- [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
- [[11.0], [12.0], [13.0], [14.0], [15.0]],
- [[16.0], [17.0], [18.0], [19.0], [20.0]]],
- [[[21.0], [22.0], [23.0], [24.0], [25.0]],
- [[26.0], [27.0], [28.0], [29.0], [30.0]],
- [[31.0], [32.0], [33.0], [34.0], [35.0]],
- [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[1.0]], [[2.0]], [[3.0]]],
- [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
- [[160.0], [226.0], [247.0], [268.0], [160.0]],
- [[240.0], [331.0], [352.0], [373.0], [220.0]],
- [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
- [[[400.0], [541.0], [562.0], [583.0], [340.0]],
- [[480.0], [646.0], [667.0], [688.0], [400.0]],
- [[560.0], [751.0], [772.0], [793.0], [460.0]],
- [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
- return
-}
-
-func @conv2d_no_padding2() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0],
- [ 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0],
- [ 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0]],
- [[ 16.0, 17.0, 18.0],
- [ 19.0, 20.0, 21.0],
- [ 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0],
- [ 28.0, 29.0, 30.0]],
- [[ 31.0, 32.0, 33.0],
- [ 34.0, 35.0, 36.0],
- [ 37.0, 38.0, 39.0],
- [ 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0]],
- [[ 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0],
- [ 52.0, 53.0, 54.0],
- [ 55.0, 56.0, 57.0],
- [ 58.0, 59.0, 60.0]]],
- [[[ 61.0, 62.0, 63.0],
- [ 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0],
- [ 70.0, 71.0, 72.0],
- [ 73.0, 74.0, 75.0]],
- [[ 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0],
- [ 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0],
- [ 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0],
- [ 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0],
- [100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0]],
- [[106.0, 107.0, 108.0],
- [109.0, 110.0, 111.0],
- [112.0, 113.0, 114.0],
- [115.0, 116.0, 117.0],
- [118.0, 119.0, 120.0]]]]> : tensor<2x4x5x3xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]],
- [[ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0, 28.0, 29.0, 30.0],
- [ 31.0, 32.0, 33.0, 34.0, 35.0, 36.0]],
- [[ 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0, 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0, 52.0, 53.0, 54.0]]],
- [[[ 55.0, 56.0, 57.0, 58.0, 59.0, 60.0],
- [ 61.0, 62.0, 63.0, 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0, 70.0, 71.0, 72.0]],
- [[ 73.0, 74.0, 75.0, 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0, 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0, 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0, 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0, 100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x3xf32>, tensor<2x3x3x6xf32>) -> tensor<2x3x3x6xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[16065.0, 16290.0, 16515.0, 16740.0, 16965.0, 17190.0],
- [18873.0, 19152.0, 19431.0, 19710.0, 19989.0, 20268.0],
- [21681.0, 22014.0, 22347.0, 22680.0, 23013.0, 23346.0]],
- [[30105.0, 30600.0, 31095.0, 31590.0, 32085.0, 32580.0],
- [32913.0, 33462.0, 34011.0, 34560.0, 35109.0, 35658.0],
- [35721.0, 36324.0, 36927.0, 37530.0, 38133.0, 38736.0]],
- [[44145.0, 44910.0, 45675.0, 46440.0, 47205.0, 47970.0],
- [46953.0, 47772.0, 48591.0, 49410.0, 50229.0, 51048.0],
- [49761.0, 50634.0, 51507.0, 52380.0, 53253.0, 54126.0]]],
- [[[72225.0, 73530.0, 74835.0, 76140.0, 77445.0, 78750.0],
- [75033.0, 76392.0, 77751.0, 79110.0, 80469.0, 81828.0],
- [77841.0, 79254.0, 80667.0, 82080.0, 83493.0, 84906.0]],
- [[86265.0, 87840.0, 89415.0, 90990.0, 92565.0, 94140.0],
- [89073.0, 90702.0, 92331.0, 93960.0, 95589.0, 97218.0],
- [91881.0, 93564.0, 95247.0, 96930.0, 98613.0, 100296.0]],
- [[100305.0, 102150.0, 103995.0, 105840.0, 107685.0, 109530.0],
- [103113.0, 105012.0, 106911.0, 108810.0, 110709.0, 112608.0],
- [105921.0, 107874.0, 109827.0, 111780.0, 113733.0, 115686.0]]]]> : tensor<2x3x3x6xf32>) : tensor<2x3x3x6xf32>
- return
-}
-
-func @conv_1d() {
- %inputs = iree.unfoldable_constant dense<2.0> : tensor<3x8x1xf32>
- %weights = iree.unfoldable_constant dense<2.0> : tensor<3x1x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 2 : i64,
- input_spatial_dimensions = dense<[1]> : tensor<1xi64>,
- kernel_input_feature_dimension = 1 : i64,
- kernel_output_feature_dimension = 2 : i64,
- kernel_spatial_dimensions = dense<[0]> : tensor<1xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 2 : i64,
- output_spatial_dimensions = dense<[1]> : tensor<1xi64>
- },
- feature_group_count = 1 : i64,
- padding = dense<0> : tensor<1x2xi64>,
- rhs_dilation = dense<1> : tensor<1xi64>,
- window_strides = dense<1> : tensor<1xi64>
- } : (tensor<3x8x1xf32>, tensor<3x1x1xf32>) -> tensor<3x6x1xf32>
- check.expect_almost_eq_const(%res, dense<12.0> : tensor<3x6x1xf32>) : tensor<3x6x1xf32>
- return
-}
-
-func @conv_3d() {
- %inputs = iree.unfoldable_constant dense<1.0> : tensor<2x8x8x8x3xf32>
- %weights = iree.unfoldable_constant dense<1.0> : tensor<2x2x2x3x2xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 4 : i64,
- input_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>,
- kernel_input_feature_dimension = 3 : i64,
- kernel_output_feature_dimension = 4 : i64,
- kernel_spatial_dimensions = dense<[0, 1, 2]> : tensor<3xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 4 : i64,
- output_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>
- },
- feature_group_count = 1 : i64,
- padding = dense<0> : tensor<3x2xi64>,
- rhs_dilation = dense<1> : tensor<3xi64>,
- window_strides = dense<1> : tensor<3xi64>
- } : (tensor<2x8x8x8x3xf32>, tensor<2x2x2x3x2xf32>) -> tensor<2x7x7x7x2xf32>
- check.expect_almost_eq_const(%res, dense<24.0> : tensor<2x7x7x7x2xf32>) : tensor<2x7x7x7x2xf32>
- return
-}
diff --git a/iree/tools/init_mlir_passes.h b/iree/tools/init_mlir_passes.h
index e1e337e..8c076d0 100644
--- a/iree/tools/init_mlir_passes.h
+++ b/iree/tools/init_mlir_passes.h
@@ -64,6 +64,7 @@
// SCF
registerSCFParallelLoopFusionPass();
registerSCFParallelLoopTilingPass();
+ registerSCFToStandardPass();
// Quant
quant::registerQuantPasses();