Convert func->util as part of input conversion. (#16411)
We now use `util.func` in place of `func.func` in all host code in the
compiler. flow/stream/hal executables continue to use `func.func` as
before for compatibility with upstream code and the benefits of the util
ops are fewer. Most code is still written against the
function/callable/call op interfaces so that we support initializers and
other future function types we may add. All tests have been updated to
use `util.func` for consistency even if the pass does still work with
`func.func`.
There's a few TODOs around better supporting tied function operands in
IPO and other passes but we aren't currently ever producing functions
with tied operands so they are hacked to bail in cases where they are
(IPO doesn't act on functions/calls with tied operands, etc).
diff --git a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/test/auto_input_conversion.mlir b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/test/auto_input_conversion.mlir
index 6776bcb..08b556b 100644
--- a/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/test/auto_input_conversion.mlir
+++ b/compiler/plugins/input/StableHLO/stablehlo-iree/Conversion/test/auto_input_conversion.mlir
@@ -2,7 +2,7 @@
// Check that the auto input conversion pipeline uses this plugin.
-// CHECK-LABEL: func.func @simple_add_stablehlo
+// CHECK-LABEL: util.func public @simple_add_stablehlo
// CHECK: arith.addi
func.func @simple_add_stablehlo(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> tensor<2x2xi32> {
%0 = stablehlo.add %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
diff --git a/compiler/plugins/input/TOSA/tosa-iree/InputConversion/test/auto_input_conversion.mlir b/compiler/plugins/input/TOSA/tosa-iree/InputConversion/test/auto_input_conversion.mlir
index 957f0d3..145f2d9 100644
--- a/compiler/plugins/input/TOSA/tosa-iree/InputConversion/test/auto_input_conversion.mlir
+++ b/compiler/plugins/input/TOSA/tosa-iree/InputConversion/test/auto_input_conversion.mlir
@@ -2,7 +2,7 @@
// Check that the auto input conversion pipeline uses this plugin.
-// CHECK-LABEL: func.func @simple_add_tosa
+// CHECK-LABEL: util.func public @simple_add_tosa
// CHECK: arith.addi
func.func @simple_add_tosa(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> tensor<2x2xi32> {
%0 = tosa.add %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
diff --git a/compiler/plugins/input/Torch/torch-iree/InputConversion/test/auto_input_conversion.mlir b/compiler/plugins/input/Torch/torch-iree/InputConversion/test/auto_input_conversion.mlir
index fc3f57e..9256519 100644
--- a/compiler/plugins/input/Torch/torch-iree/InputConversion/test/auto_input_conversion.mlir
+++ b/compiler/plugins/input/Torch/torch-iree/InputConversion/test/auto_input_conversion.mlir
@@ -2,7 +2,7 @@
// Check that the auto input conversion pipeline uses this plugin.
-// CHECK-LABEL: func.func @simple_add_torch
+// CHECK-LABEL: util.func public @simple_add_torch
// CHECK: arith.addf
func.func @simple_add_torch(%arg0: !torch.vtensor<[2],f32>, %arg1: !torch.vtensor<[2],f32>) -> !torch.vtensor<[2],f32> {
%int1 = torch.constant.int 1
@@ -12,7 +12,7 @@
// -----
-// CHECK-LABEL: func.func @simple_add_onnx
+// CHECK-LABEL: util.func public @simple_add_onnx
// CHECK: arith.addi
func.func @simple_add_onnx(%arg0: !torch.vtensor<[],si64>, %arg1: !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.1.0"} {
%0 = torch.operator "onnx.Add"(%arg0, %arg1) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64>
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/BUILD.bazel b/compiler/src/iree/compiler/Bindings/Native/Transforms/BUILD.bazel
index 33594a7..599c664 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/BUILD.bazel
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/BUILD.bazel
@@ -29,7 +29,6 @@
"//compiler/src/iree/compiler/Utils",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:AffineUtils",
- "@llvm-project//mlir:FuncDialect",
"@llvm-project//mlir:FunctionInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/CMakeLists.txt b/compiler/src/iree/compiler/Bindings/Native/Transforms/CMakeLists.txt
index 66617bf..571299e 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/CMakeLists.txt
@@ -22,7 +22,6 @@
DEPS
LLVMSupport
MLIRAffineUtils
- MLIRFuncDialect
MLIRFunctionInterfaces
MLIRIR
MLIRPass
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp
index c8de124..385b836 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/ConvertStreamableOps.cpp
@@ -7,7 +7,8 @@
#include "iree/compiler/Bindings/Native/Transforms/Passes.h"
#include "iree/compiler/Dialect/Flow/IR/FlowDialect.h"
#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
+#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/MLIRContext.h"
@@ -80,7 +81,7 @@
// Converts a func.func with the iree.abi.streamable attribute into a flow.func
// and fixes all func.call ops to be flow.call across the module.
static std::optional<StreamableFunc>
-convertStreamableFunc(mlir::ModuleOp moduleOp, func::FuncOp funcOp,
+convertStreamableFunc(mlir::ModuleOp moduleOp, IREE::Util::FuncOp funcOp,
SymbolTable &symbolTable) {
OpBuilder moduleBuilder(funcOp);
auto functionType = funcOp.getFunctionType();
@@ -137,8 +138,18 @@
}
}
+ bool anyTiedOperands = false;
streamableFunc.tiedOperands.resize(functionType.getNumResults(),
IREE::Util::TiedOpInterface::kUntiedIndex);
+ if (auto tiedOperandsAttr = funcOp.getTiedOperandsAttr()) {
+ for (auto [resultIndex, tiedAttr] : llvm::enumerate(
+ funcOp.getTiedOperandsAttr().getAsRange<IntegerAttr>())) {
+ if (tiedAttr.getInt() != IREE::Util::TiedOpInterface::kUntiedIndex) {
+ streamableFunc.tiedOperands[resultIndex] = tiedAttr.getInt();
+ anyTiedOperands = true;
+ }
+ }
+ }
SmallVector<DictionaryAttr> funcResAttrs;
for (auto [i, resultType] : llvm::enumerate(functionType.getResults())) {
// Tensor results need to have their dynamic dimensions specified.
@@ -157,8 +168,8 @@
if (auto oldResAttrs = funcOp.getResultAttrDict(i)) {
// First check if the result is tied to an argument.
// We can use this to source the initial set of dynamic dimensions.
- if (auto tiedAttr = oldResAttrs.getAs<IntegerAttr>("iree.abi.tied")) {
- streamableFunc.tiedOperands[i] = tiedAttr.getInt();
+ int64_t tiedIndex = streamableFunc.tiedOperands[i];
+ if (tiedIndex != IREE::Util::TiedOpInterface::kUntiedIndex) {
if (!streamableFunc.resultDimsFunc &&
shapedType == functionType.getInput(i)) {
// Tied types match and we can infer the shape from that. This may
@@ -195,8 +206,7 @@
// Pass-through all other attrs we don't care about.
for (auto resAttr : oldResAttrs) {
- if (resAttr.getName() == "iree.abi.tied" ||
- resAttr.getName() == "iree.abi.dims") {
+ if (resAttr.getName() == "iree.abi.dims") {
continue;
}
newResAttrs.push_back(resAttr);
@@ -221,10 +231,13 @@
}
// Create the new streamable flow.func op at the same place as the original.
+ auto tiedOperandsAttr =
+ anyTiedOperands
+ ? moduleBuilder.getIndexArrayAttr(streamableFunc.tiedOperands)
+ : ArrayAttr{};
streamableFunc.funcOp = moduleBuilder.create<IREE::Flow::FuncOp>(
- funcOp.getLoc(), funcOp.getName(), functionType,
- moduleBuilder.getIndexArrayAttr(streamableFunc.tiedOperands), funcAttrs,
- funcArgAttrs, funcResAttrs);
+ funcOp.getLoc(), funcOp.getName(), functionType, tiedOperandsAttr,
+ funcAttrs, funcArgAttrs, funcResAttrs);
// Swap out the symbol in the symbol table.
symbolTable.erase(funcOp);
@@ -234,7 +247,7 @@
}
static LogicalResult convertStreamableCall(StreamableFunc &streamableFunc,
- func::CallOp callOp) {
+ IREE::Util::CallOp callOp) {
OpBuilder builder(callOp);
// Capture all argument dynamic dimensions.
@@ -253,9 +266,10 @@
// It should return the required number of dynamic dimensions.
SmallVector<Type> resultDimTypes(streamableFunc.requiredResultDims,
builder.getIndexType());
- auto calculateCallOp = builder.create<func::CallOp>(
- callOp.getLoc(), streamableFunc.resultDimsFunc, resultDimTypes,
- callOp.getOperands());
+ auto calculateCallOp = builder.create<IREE::Util::CallOp>(
+ callOp.getLoc(), resultDimTypes,
+ streamableFunc.resultDimsFunc.getLeafReference().getValue(),
+ callOp.getOperands(), ArrayAttr{});
llvm::append_range(resultDims, calculateCallOp.getResults());
} else {
// Get the shape dimensions from existing call arguments or tied operands.
@@ -301,7 +315,7 @@
static LogicalResult
convertStreamableCalls(mlir::ModuleOp moduleOp,
DenseMap<StringRef, StreamableFunc> &streamableFuncs) {
- auto walkResult = moduleOp.walk([&](func::CallOp callOp) {
+ auto walkResult = moduleOp.walk([&](IREE::Util::CallOp callOp) {
auto it = streamableFuncs.find(callOp.getCallee());
if (it != streamableFuncs.end()) {
if (failed(convertStreamableCall(it->second, callOp))) {
@@ -320,8 +334,8 @@
ConvertStreamableOpsPass(const ConvertStreamableOpsPass &pass) {}
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<func::FuncDialect, mlir::tensor::TensorDialect,
- IREE::Flow::FlowDialect>();
+ registry.insert<mlir::tensor::TensorDialect, IREE::Flow::FlowDialect,
+ IREE::Util::UtilDialect>();
}
StringRef getArgument() const override {
@@ -337,8 +351,8 @@
auto moduleOp = getOperation();
// Gather functions that need wrapping.
- SmallVector<func::FuncOp> originalFuncOps;
- for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
+ SmallVector<IREE::Util::FuncOp> originalFuncOps;
+ for (auto funcOp : moduleOp.getOps<IREE::Util::FuncOp>()) {
// Ignore functions already marked as having their ABI goo handled.
if (funcOp->hasAttr("iree.abi.streamable")) {
if (!funcOp.isExternal()) {
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp
index cac0abc..9c22431 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/Passes.cpp
@@ -10,7 +10,6 @@
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "iree/compiler/Utils/PassUtils.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Transforms/Passes.h"
@@ -18,7 +17,7 @@
namespace mlir::iree_compiler::IREE::ABI {
using FunctionLikeNest =
- MultiOpNest<func::FuncOp, IREE::Util::InitializerOp, IREE::Util::FuncOp>;
+ MultiOpNest<IREE::Util::InitializerOp, IREE::Util::FuncOp>;
void buildTransformPassPipeline(OpPassManager &passManager,
const InvocationOptions &invocationOptions) {
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp b/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp
index 6216eab..ed8af82 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/WrapEntryPoints.cpp
@@ -7,9 +7,10 @@
#include "iree/compiler/Bindings/Native/Transforms/Passes.h"
#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
+#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "llvm/ADT/STLExtras.h"
#include "mlir/Dialect/Affine/Utils.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/MLIRContext.h"
@@ -59,13 +60,14 @@
}
// Creates the corresponding wrapper function for the given import function.
-static func::FuncOp
+static IREE::Util::FuncOp
createImportWrapperFunc(IREE::ABI::InvocationModel invocationModel,
- func::FuncOp importOp, FunctionType oldImportType,
- FunctionType newImportType, StringRef privateName) {
+ FunctionOpInterface importOp,
+ FunctionType oldImportType, FunctionType newImportType,
+ StringRef privateName) {
// Create the internal wrapper function with the original import signature.
auto wrapperOp =
- func::FuncOp::create(importOp.getLoc(), privateName, oldImportType);
+ IREE::Util::FuncOp::create(importOp.getLoc(), privateName, oldImportType);
wrapperOp.setPrivate();
// Copy arg/result attrs from the import op to the wrapper function.
@@ -202,8 +204,8 @@
arguments.push_back(signalFence);
// Make the call with the updated types.
- auto callOp =
- entryBuilder.create<func::CallOp>(importOp.getLoc(), importOp, arguments);
+ auto callOp = entryBuilder.create<IREE::Util::CallOp>(importOp.getLoc(),
+ importOp, arguments);
// If the call has side-effects then we need to wait on its signal fence on
// the host. This is because they may have launched a thread of their own to
@@ -235,7 +237,7 @@
}
}
- entryBuilder.create<func::ReturnOp>(importOp.getLoc(), results);
+ entryBuilder.create<IREE::Util::ReturnOp>(importOp.getLoc(), results);
return wrapperOp;
}
@@ -244,7 +246,7 @@
// new wrapper function.
static LogicalResult wrapImportFunc(IREE::ABI::InvocationModel invocationModel,
mlir::ModuleOp moduleOp,
- func::FuncOp importOp,
+ FunctionOpInterface importOp,
SymbolTable &symbolTable) {
// Replace all existing calls to the import to instead call the wrapper.
auto publicName = importOp.getName().str();
@@ -258,9 +260,8 @@
}
// Convert import signature types to those required by the binding ABI.
- auto oldImportType = importOp.getFunctionType();
SmallVector<Type> inputTypes;
- for (auto oldType : oldImportType.getInputs()) {
+ for (auto oldType : importOp.getArgumentTypes()) {
inputTypes.push_back(mapToABIType(oldType));
}
auto fenceType = IREE::HAL::FenceType::get(importOp.getContext());
@@ -274,7 +275,7 @@
break;
}
SmallVector<Type> resultTypes;
- for (auto oldType : oldImportType.getResults()) {
+ for (auto oldType : importOp.getResultTypes()) {
resultTypes.push_back(mapToABIType(oldType));
}
auto newImportType =
@@ -283,7 +284,8 @@
// Create the wrapper function that matches the original internal types but
// calls out to the updated import using ABI types.
auto wrapperOp = createImportWrapperFunc(
- invocationModel, importOp, oldImportType, newImportType, privateName);
+ invocationModel, importOp, cast<FunctionType>(importOp.getFunctionType()),
+ newImportType, privateName);
if (!wrapperOp)
return failure();
moduleOp.insert(++Block::iterator(importOp), wrapperOp);
@@ -344,7 +346,7 @@
// meaningful with them (like names).
static StringAttr
formatSourceDeclaration(IREE::ABI::InvocationModel invocationModel,
- func::FuncOp exportOp, StringRef publicName,
+ FunctionOpInterface exportOp, StringRef publicName,
ArrayAttr allArgAttrs, ArrayAttr allResultAttrs) {
std::string decl;
llvm::raw_string_ostream os(decl);
@@ -395,8 +397,8 @@
// These are attached to the exported function and can be queried at runtime
// with iree_vm_function_lookup_attr_by_name.
static void populateReflectionAttrs(IREE::ABI::InvocationModel invocationModel,
- func::FuncOp exportOp,
- func::FuncOp wrapperOp) {
+ FunctionOpInterface exportOp,
+ IREE::Util::FuncOp wrapperOp) {
auto *context = exportOp.getContext();
SmallVector<NamedAttribute> attrs;
@@ -441,9 +443,9 @@
}
// Creates the corresponding wrapper function for the given export function.
-static func::FuncOp
+static IREE::Util::FuncOp
createExportWrapperFunc(IREE::ABI::InvocationModel invocationModel,
- func::FuncOp exportOp, StringRef publicName) {
+ FunctionOpInterface exportOp, StringRef publicName) {
// Copy arg/result attrs from the export op to the wrapper function.
// We may want to remove them from the export but would need to filter.
SmallVector<DictionaryAttr> argAttrDict;
@@ -458,9 +460,8 @@
// NOTE: this is where we could change our signature to provide additional
// values from the runtime bindings as may be required - like semaphores for
// async behavior or cancellation.
- auto oldExportType = exportOp.getFunctionType();
SmallVector<Type> inputTypes;
- for (auto oldType : oldExportType.getInputs()) {
+ for (auto oldType : exportOp.getArgumentTypes()) {
inputTypes.push_back(mapToABIType(oldType));
}
auto fenceType = IREE::HAL::FenceType::get(exportOp.getContext());
@@ -476,7 +477,7 @@
break;
}
SmallVector<Type> resultTypes;
- for (auto oldType : oldExportType.getResults()) {
+ for (auto oldType : exportOp.getResultTypes()) {
resultTypes.push_back(mapToABIType(oldType));
}
auto newExportType =
@@ -485,7 +486,7 @@
// Update the import to the new type and mark it as being converted so we
// don't try to convert it again.
auto wrapperOp =
- func::FuncOp::create(exportOp.getLoc(), publicName, newExportType);
+ IREE::Util::FuncOp::create(exportOp.getLoc(), publicName, newExportType);
wrapperOp.setPublic();
wrapperOp->setAttr("iree.abi.stub", UnitAttr::get(exportOp.getContext()));
wrapperOp.setAllArgAttrs(argAttrDict);
@@ -536,6 +537,7 @@
}
// Marshal arguments.
+ auto oldExportType = cast<FunctionType>(exportOp.getFunctionType());
SmallVector<Value> arguments;
for (auto [argIndex, arg] : llvm::enumerate(
entryBlock->getArguments().slice(0, oldExportType.getNumInputs()))) {
@@ -555,8 +557,8 @@
}
// Make the call with the original types.
- auto callOp =
- entryBuilder.create<func::CallOp>(exportOp.getLoc(), exportOp, arguments);
+ auto callOp = entryBuilder.create<IREE::Util::CallOp>(exportOp.getLoc(),
+ exportOp, arguments);
auto asyncResults = llvm::to_vector(callOp.getResults());
// Insert a barrier if requested - all tensors will be calculated and the
@@ -602,7 +604,7 @@
}
}
- entryBuilder.create<func::ReturnOp>(exportOp.getLoc(), results);
+ entryBuilder.create<IREE::Util::ReturnOp>(exportOp.getLoc(), results);
return wrapperOp;
}
@@ -612,20 +614,16 @@
// bindings can also perform their own equivalent wrapping.
static LogicalResult wrapExportFunc(IREE::ABI::InvocationModel invocationModel,
mlir::ModuleOp moduleOp,
- func::FuncOp exportOp,
+ FunctionOpInterface exportOp,
SymbolTable &symbolTable) {
// Rename the original function so that our wrapper can use the original
// name in its public definition.
auto publicName = exportOp.getName().str();
auto privateName = "_" + publicName;
- auto privateNameAttr =
- mlir::StringAttr::get(exportOp.getContext(), privateName);
- if (failed(symbolTable.replaceAllSymbolUses(exportOp, privateNameAttr,
- moduleOp))) {
+ if (failed(symbolTable.rename(exportOp, privateName))) {
return exportOp.emitError() << "unknown symbol table op encountered; "
"cannot fix up symbol names";
}
- exportOp.setName(privateNameAttr);
exportOp.setPrivate();
// Create the wrapper function that conforms to the IREE native ABI and
@@ -634,7 +632,7 @@
createExportWrapperFunc(invocationModel, exportOp, publicName);
if (!wrapperOp)
return failure();
- moduleOp.insert(Block::iterator(exportOp), wrapperOp);
+ symbolTable.insert(wrapperOp, Block::iterator(exportOp));
return success();
}
@@ -652,8 +650,8 @@
}
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<func::FuncDialect, mlir::arith::ArithDialect,
- mlir::tensor::TensorDialect, IREE::HAL::HALDialect>();
+ registry.insert<mlir::arith::ArithDialect, mlir::tensor::TensorDialect,
+ IREE::HAL::HALDialect, IREE::Util::UtilDialect>();
}
StringRef getArgument() const override {
@@ -670,13 +668,13 @@
auto moduleOp = getOperation();
// Gather functions that need wrapping.
- SmallVector<func::FuncOp> importOps;
- SmallVector<func::FuncOp> exportOps;
- for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
+ SmallVector<FunctionOpInterface> importOps;
+ SmallVector<FunctionOpInterface> exportOps;
+ for (auto funcOp : moduleOp.getOps<IREE::Util::FuncOp>()) {
// Ignore functions already marked as having their ABI goo handled.
- if (funcOp->hasAttr("iree.abi.stub"))
+ if (funcOp->hasAttr("iree.abi.stub")) {
continue;
- if (funcOp.isExternal()) {
+ } else if (funcOp.isExternal()) {
// Imported function.
importOps.push_back(funcOp);
} else if (funcOp.isPublic()) {
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/convert_streamable_ops.mlir b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/convert_streamable_ops.mlir
index 0f74c2a..e889c0e 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/convert_streamable_ops.mlir
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/convert_streamable_ops.mlir
@@ -1,73 +1,9 @@
// RUN: iree-opt --iree-abi-convert-streamable-ops --cse --split-input-file %s --verify-diagnostics | FileCheck %s
-// Tests most of the features of the conversion.
-
-// CHECK: flow.func private @import(%arg0: tensor<?x2xi32> {some.arg_attr}, %arg1: tensor<?x4xf32>, %arg2: i32, %arg3: index) -> (%arg0, tensor<?x4xi8> {some.result_attr})
-func.func private @import(tensor<?x2xi32> {some.arg_attr}, tensor<?x4xf32>, i32, index) ->
- (tensor<?x2xi32> {iree.abi.tied = 0 : index}, tensor<?x4xi8> {iree.abi.dims = [3 : index], some.result_attr}) attributes {
- iree.abi.streamable
-}
-
-// CHECK: func.func private @caller
-func.func private @caller(%arg0: tensor<?x2xi32>, %arg1: tensor<?x4xf32>, %arg2: i32, %dim0: index) -> (tensor<?x2xi32>, tensor<?x4xi8>) {
- // CHECK-DAG: %[[ARG0_DIM0:.+]] = tensor.dim %arg0, %c0
- // CHECK-DAG: %[[ARG1_DIM0:.+]] = tensor.dim %arg1, %c0
- // CHECK: %[[RETS:.+]]:2 = flow.call @import(%arg0, %arg1, %arg2, %arg3) : (tensor<?x2xi32>{%[[ARG0_DIM0]]}, tensor<?x4xf32>{%[[ARG1_DIM0]]}, i32, index) -> (%arg0{%[[ARG0_DIM0]]}, tensor<?x4xi8>{%arg3})
- %0:2 = call @import(%arg0, %arg1, %arg2, %dim0) : (tensor<?x2xi32>, tensor<?x4xf32>, i32, index) -> (tensor<?x2xi32>, tensor<?x4xi8>)
- // CHECK: return %[[RETS]]#0, %[[RETS]]#1
- return %0#0, %0#1 : tensor<?x2xi32>, tensor<?x4xi8>
-}
-
-// -----
-
-// Verifies if a user doesn't specify untied result dynamic dims we error out.
-
-// expected-error @+1 {{missing dynamic dimensions on result 0}}
-func.func private @importMissingResultDims(tensor<?x?xi32>, index, index) -> tensor<?x?xf32> attributes {
- iree.abi.streamable
-}
-
-// -----
-
-// Tests that untied results with dynamic dimensions can resolve them.
-// Users need to specify in such cases.
-
-// CHECK: flow.func private @importWithResultDims(%arg0: tensor<?x?xi32>, %arg1: index, %arg2: index) -> tensor<?x?xf32>
-func.func private @importWithResultDims(tensor<?x?xi32>, index, index) -> (tensor<?x?xf32> {iree.abi.dims = [1 : index, 2 : index]}) attributes {
- iree.abi.streamable
-}
-
-// CHECK: func.func private @callerWithResultDims
-func.func private @callerWithResultDims(%arg0: tensor<?x?xi32>, %arg1: index, %arg2: index) -> tensor<?x?xf32> {
- // CHECK-DAG: %[[ARG0_DIM0:.+]] = tensor.dim %arg0, %c0
- // CHECK-DAG: %[[ARG0_DIM1:.+]] = tensor.dim %arg0, %c1
- // CHECK: %[[RET:.+]] = flow.call @importWithResultDims(%arg0, %arg1, %arg2) : (tensor<?x?xi32>{%[[ARG0_DIM0]], %[[ARG0_DIM1]]}, index, index) -> tensor<?x?xf32>{%arg1, %arg2}
- %0 = call @importWithResultDims(%arg0, %arg1, %arg2) : (tensor<?x?xi32>, index, index) -> tensor<?x?xf32>
- // CHECK: return %[[RET]]
- return %0 : tensor<?x?xf32>
-}
-
-// -----
-
-// Verifies if the user tries specifying result dims and a calculation function
-// we properly error.
-
-func.func private @calculateOverconstrainedResultDims(%arg0: index) -> index {
- return %arg0 : index
-}
-
-// expected-error @+1 {{cannot have both an explicit result shape calculation function}}
-func.func private @importOverconstrainedResultDims(index) -> (tensor<2x?xf32> {iree.abi.dims = [0 : index]}) attributes {
- iree.abi.streamable,
- iree.abi.result_dims = @calculateOverconstrainedResultDims
-}
-
-// -----
-
// Tests using a shape computation function for computing result dimensions.
-// CHECK: func.func private @calculateResultDims
-func.func private @calculateResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (index, index) {
+// CHECK: util.func private @calculateResultDims
+util.func private @calculateResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (index, index) {
// Could do math here, call other imported host functions, etc. Note that
// doing anything but tensor.dim on the tensor arguments will cause massive
// performance penalties and should always be avoided.
@@ -77,60 +13,22 @@
%c1 = arith.constant 1 : index
%arg0_dim1 = tensor.dim %arg0, %c1 : tensor<1x?xi32>
%arg2_dim0 = tensor.dim %arg2, %c0 : tensor<?xf32>
- return %arg0_dim1, %arg2_dim0 : index, index
+ util.return %arg0_dim1, %arg2_dim0 : index, index
}
// CHECK: flow.func private @importCustomResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>)
-func.func private @importCustomResultDims(tensor<1x?xi32>, i32, tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>) attributes {
+util.func private @importCustomResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>) attributes {
iree.abi.streamable,
iree.abi.result_dims = @calculateResultDims
}
-// CHECK: func.func private @callerCustomResultDims
-func.func private @callerCustomResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>) {
+// CHECK: util.func private @callerCustomResultDims
+util.func private @callerCustomResultDims(%arg0: tensor<1x?xi32>, %arg1: i32, %arg2: tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>) {
// CHECK-DAG: %[[ARG0_DIM1:.+]] = tensor.dim %arg0, %c1
// CHECK-DAG: %[[ARG2_DIM0:.+]] = tensor.dim %arg2, %c0
- // CHECK: %[[RET_DIMS:.+]]:2 = call @calculateResultDims(%arg0, %arg1, %arg2) : (tensor<1x?xi32>, i32, tensor<?xf32>) -> (index, index)
+ // CHECK: %[[RET_DIMS:.+]]:2 = util.call @calculateResultDims(%arg0, %arg1, %arg2) : (tensor<1x?xi32>, i32, tensor<?xf32>) -> (index, index)
// CHECK: %[[RETS:.+]]:2 = flow.call @importCustomResultDims(%arg0, %arg1, %arg2) : (tensor<1x?xi32>{%[[ARG0_DIM1]]}, i32, tensor<?xf32>{%[[ARG2_DIM0]]}) -> (tensor<2x?xf32>{%[[RET_DIMS]]#0}, tensor<?xi8>{%[[RET_DIMS]]#1})
- %0:2 = call @importCustomResultDims(%arg0, %arg1, %arg2) : (tensor<1x?xi32>, i32, tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>)
- // CHECK: return %[[RETS]]#0, %[[RETS]]#1
- return %0#0, %0#1 : tensor<2x?xf32>, tensor<?xi8>
-}
-
-// -----
-
-// Tests that results tied to operands get handled correctly and reuse the
-// argument shapes.
-
-// CHECK: flow.func private @importWithTies(%arg0: tensor<?x?xi32>) -> %arg0
-func.func private @importWithTies(tensor<?x?xi32>) -> (tensor<?x?xi32> {iree.abi.tied = 0 : index}) attributes {
- iree.abi.streamable
-}
-
-// CHECK: func.func private @callerWithTies
-func.func private @callerWithTies(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
- // CHECK-DAG: %[[ARG0_DIM0:.+]] = tensor.dim %arg0, %c0
- // CHECK-DAG: %[[ARG0_DIM1:.+]] = tensor.dim %arg0, %c1
- // CHECK: %[[RET:.+]] = flow.call @importWithTies(%arg0) : (tensor<?x?xi32>{%[[ARG0_DIM0]], %[[ARG0_DIM1]]}) -> %arg0{%[[ARG0_DIM0]], %[[ARG0_DIM1]]}
- %0 = call @importWithTies(%arg0) : (tensor<?x?xi32>) -> tensor<?x?xi32>
- // CHECK: return %[[RET]]
- return %0 : tensor<?x?xi32>
-}
-
-// -----
-
-// Tests that attrs we don't know about are passed through to the new ops.
-
-// CHECK: flow.func private @importPassThroughAttrs(%arg0: tensor<1xi32> {some.arg_attr}) -> (tensor<1xi8> {some.result_attr}) attributes {some.import_attr}
-func.func private @importPassThroughAttrs(tensor<1xi32> {some.arg_attr}) -> (tensor<1xi8> {some.result_attr}) attributes {
- iree.abi.streamable,
- some.import_attr
-}
-
-// CHECK: func.func private @callerPassThroughArgs
-func.func private @callerPassThroughArgs(%arg0: tensor<1xi32>) -> tensor<1xi8> {
- // CHECK: %[[RET:.+]] = flow.call @importPassThroughAttrs(%arg0) {some.call_attr} : (tensor<1xi32>) -> tensor<1xi8>
- %0 = call @importPassThroughAttrs(%arg0) {some.call_attr} : (tensor<1xi32>) -> tensor<1xi8>
- // CHECK: return %[[RET]]
- return %0 : tensor<1xi8>
+ %0:2 = util.call @importCustomResultDims(%arg0, %arg1, %arg2) : (tensor<1x?xi32>, i32, tensor<?xf32>) -> (tensor<2x?xf32>, tensor<?xi8>)
+ // CHECK: util.return %[[RETS]]#0, %[[RETS]]#1
+ util.return %0#0, %0#1 : tensor<2x?xf32>, tensor<?xi8>
}
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points.mlir b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points.mlir
index 5aa9da3..d1f4751 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points.mlir
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points.mlir
@@ -2,7 +2,7 @@
// Tests basic dynamic tensor I/O marshaling.
-// CHECK-LABEL: func.func @dynamicEntry(
+// CHECK-LABEL: util.func public @dynamicEntry(
// CHECK-SAME: %[[ARG0:.+]]: !hal.buffer_view, %[[ARG1:.+]]: !hal.buffer_view
// CHECK-SAME: -> (
// CHECK-SAME: !hal.buffer_view, !hal.buffer_view
@@ -15,20 +15,20 @@
// CHECK-NEXT: %[[ARG0_TENSOR:.+]] = hal.tensor.import %[[ARG0]] "input0" : !hal.buffer_view -> tensor<?x8x8x3xf32>{%[[ARG0_DIM0]]}
// CHECK-NEXT: %[[ARG1_DIM0:.+]] = hal.buffer_view.dim<%[[ARG1]] : !hal.buffer_view>[0] : index
// CHECK-NEXT: %[[ARG1_TENSOR:.+]] = hal.tensor.import %[[ARG1]] "input1" : !hal.buffer_view -> tensor<?x8x8x3xf32>{%[[ARG1_DIM0]]}
-// CHECK-NEXT: %[[RET_TENSORS:.+]]:2 = call @_dynamicEntry(%[[ARG0_TENSOR]], %[[ARG1_TENSOR]])
+// CHECK-NEXT: %[[RET_TENSORS:.+]]:2 = util.call @_dynamicEntry(%[[ARG0_TENSOR]], %[[ARG1_TENSOR]])
// CHECK: %[[RET0_DIM0:.+]] = tensor.dim %[[RET_TENSORS]]#0, %c0{{.*}} : tensor<?x8x8x3xf32>
// CHECK-NEXT: %[[RET0_VIEW:.+]] = hal.tensor.export %[[RET_TENSORS]]#0 "output0" : tensor<?x8x8x3xf32>{%[[RET0_DIM0]]} -> !hal.buffer_view
// CHECK: %[[RET1_DIM0:.+]] = tensor.dim %[[RET_TENSORS]]#1, %c0{{.*}} : tensor<?x8x8x3xf32>
// CHECK-NEXT: %[[RET1_VIEW:.+]] = hal.tensor.export %[[RET_TENSORS]]#1 "output1" : tensor<?x8x8x3xf32>{%[[RET1_DIM0]]} -> !hal.buffer_view
-// CHECK-NEXT: return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
+// CHECK-NEXT: util.return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
// CHECK-NEXT: }
-// CHECK-LABEL: func.func private @_dynamicEntry(
-func.func @dynamicEntry(%arg0: tensor<?x8x8x3xf32>, %arg1: tensor<?x8x8x3xf32>) ->
+// CHECK-LABEL: util.func private @_dynamicEntry(
+util.func public @dynamicEntry(%arg0: tensor<?x8x8x3xf32>, %arg1: tensor<?x8x8x3xf32>) ->
(tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>) {
%0 = arith.addf %arg0, %arg1 : tensor<?x8x8x3xf32>
%1 = arith.addf %0, %arg0 : tensor<?x8x8x3xf32>
- return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
+ util.return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
}
// -----
@@ -36,18 +36,18 @@
// Tests that an existing iree.reflection dictionary is merged with the new
// reflection information.
-// CHECK-LABEL: func.func @existingReflection
+// CHECK-LABEL: util.func public @existingReflection
// CHECK-SAME: iree.reflection =
// CHECK-SAME: iree.abi.declaration = "sync func @existingReflection
// CHECK-SAME: some.attr = 4 : index
-// CHECK: func.func private @_existingReflection
+// CHECK: util.func private @_existingReflection
// CHECK-NOT: iree.reflection = {some.attr = 4 : index}
-func.func @existingReflection() attributes {
+util.func public @existingReflection() attributes {
iree.reflection = {
some.attr = 4 : index
}
} {
- return
+ util.return
}
// -----
@@ -55,47 +55,47 @@
// Tests that iree.abi.declaration is added when needed and otherwise the user
// provided value is passed through.
-// CHECK-LABEL: func.func @existingDeclaration
+// CHECK-LABEL: util.func public @existingDeclaration
// CHECK-SAME: iree.reflection =
// CHECK-SAME: iree.abi.declaration = "some.python.thing(types_are_overrated)"
-func.func @existingDeclaration(%arg0: tensor<i32>) attributes {
+util.func public @existingDeclaration(%arg0: tensor<i32>) attributes {
iree.abi.declaration = "some.python.thing(types_are_overrated)"
} {
- return
+ util.return
}
// -----
// Tests that name overrides propagate into both metadata and assertion IR.
-// CHECK-LABEL: func.func @namedEntry
+// CHECK-LABEL: util.func public @namedEntry
// CHECK-SAME: iree.reflection =
// CHECK-SAME: iree.abi.declaration = "sync func @namedEntry(%my_input_0: tensor<3xf32>, %input1: tensor<3xf32>) -> (%my_output_0: tensor<3xf32>, %output1: tensor<3xf32>)"
-func.func @namedEntry(%arg0: tensor<3xf32> {iree.abi.name = "my_input_0"}, %arg1: tensor<3xf32>) ->
+util.func public @namedEntry(%arg0: tensor<3xf32> {iree.abi.name = "my_input_0"}, %arg1: tensor<3xf32>) ->
(tensor<3xf32> {iree.abi.name = "my_output_0"}, tensor<3xf32>) {
%0 = arith.addf %arg0, %arg1 : tensor<3xf32>
- return %0, %0 : tensor<3xf32>, tensor<3xf32>
+ util.return %0, %0 : tensor<3xf32>, tensor<3xf32>
}
// -----
// Tests that exports with encodings specified are propagated to the HAL ops.
-// CHECK-LABEL: func.func @exportEncodings
+// CHECK-LABEL: util.func public @exportEncodings
// CHECK-SAME: iree.abi.declaration = "sync func @exportEncodings(%input0: tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>}) -> (%output0: tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>})"
// CHECK: hal.tensor.import {{.+}} : !hal.buffer_view -> tensor<?x8x8x3xi32> as tensor<?x8x8x3xf32>{{.+}}
// CHECK: hal.tensor.export {{.+}} : tensor<?x8x8x3xi32> as tensor<?x8x8x3xf32>{{.+}} -> !hal.buffer_view
-// CHECK-LABEL: func.func private @_exportEncodings
-func.func @exportEncodings(%arg0: tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>}) -> (tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>}) {
- return %arg0 : tensor<?x8x8x3xf32>
+// CHECK-LABEL: util.func private @_exportEncodings
+util.func public @exportEncodings(%arg0: tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>}) -> (tensor<?x8x8x3xf32> {iree.abi.encoding = tensor<?x8x8x3xi32>}) {
+ util.return %arg0 : tensor<?x8x8x3xf32>
}
// -----
// Tests specifying explicit storage for specific function results.
-// CHECK-LABEL: func.func @outputStorage
+// CHECK-LABEL: util.func public @outputStorage
// CHECK-SAME: (%[[ARG0:[a-z0-9]+]]: !hal.buffer_view, %[[RET1_STORAGE:[a-z0-9]+]]: !hal.buffer)
// CHECK-SAME: -> (!hal.buffer_view, !hal.buffer_view) attributes {
// CHECK-SAME: iree.abi.stub
@@ -104,54 +104,54 @@
// CHECK-SAME: } {
// CHECK-NEXT: %[[ARG0_DIM0:.+]] = hal.buffer_view.dim<%[[ARG0]] : !hal.buffer_view>[0] : index
// CHECK-NEXT: %[[ARG0_TENSOR:.+]] = hal.tensor.import %[[ARG0]] "input0" : !hal.buffer_view -> tensor<?x8x8x3xf32>{%[[ARG0_DIM0]]}
-// CHECK-NEXT: %[[RET_TENSORS:.+]]:2 = call @_outputStorage(%[[ARG0_TENSOR]], %[[RET1_STORAGE]])
+// CHECK-NEXT: %[[RET_TENSORS:.+]]:2 = util.call @_outputStorage(%[[ARG0_TENSOR]], %[[RET1_STORAGE]])
// CHECK: %[[RET0_DIM0:.+]] = tensor.dim %[[RET_TENSORS]]#0, %c0{{.*}} : tensor<?x8x8x3xf32>
// CHECK-NEXT: %[[RET0_VIEW:.+]] = hal.tensor.export %[[RET_TENSORS]]#0 "output0" : tensor<?x8x8x3xf32>{%[[RET0_DIM0]]} -> !hal.buffer_view
// CHECK: %[[RET1_DIM0:.+]] = tensor.dim %[[RET_TENSORS]]#1, %c0{{.*}} : tensor<?x8x8x3xf32>
// CHECK-NEXT: %[[RET1_VIEW:.+]] = hal.tensor.export %[[RET_TENSORS]]#1 "output1" into(%[[RET1_STORAGE]] : !hal.buffer) : tensor<?x8x8x3xf32>{%[[RET1_DIM0]]} -> !hal.buffer_view
-// CHECK-NEXT: return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
+// CHECK-NEXT: util.return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
// CHECK-NEXT: }
-// CHECK-LABEL: func.func private @_outputStorage(
-func.func @outputStorage(%arg0: tensor<?x8x8x3xf32>, %ret1: !hal.buffer {iree.abi.output = 1 : index}) ->
+// CHECK-LABEL: util.func private @_outputStorage(
+util.func public @outputStorage(%arg0: tensor<?x8x8x3xf32>, %ret1: !hal.buffer {iree.abi.output = 1 : index}) ->
(tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>) {
%0 = arith.addf %arg0, %arg0 : tensor<?x8x8x3xf32>
%1 = arith.addf %0, %arg0 : tensor<?x8x8x3xf32>
- return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
+ util.return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
}
// -----
// Tests that functions already wrapped (iree.abi.stub present) are ignored.
-// CHECK-LABEL: func.func @wrappedAlready
+// CHECK-LABEL: util.func public @wrappedAlready
// CHECK-SAME: (%arg0: !hal.buffer_view) -> !hal.buffer_view
// CHECK-SAME: attributes {iree.abi.stub}
-func.func @wrappedAlready(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
- return %arg0 : !hal.buffer_view
+util.func public @wrappedAlready(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
+ util.return %arg0 : !hal.buffer_view
}
-// CHECK-NOT: func.func @_wrappedAlready
+// CHECK-NOT: util.func public @_wrappedAlready
// -----
// Tests that a function calling an exported function is redirected to the
// original unwrapped call.
-// CHECK-LABEL: func.func @exportA(%arg0: !hal.buffer_view) -> !hal.buffer_view
+// CHECK-LABEL: util.func public @exportA(%arg0: !hal.buffer_view) -> !hal.buffer_view
// CHECK: call @_exportA
-// CHECK: func.func private @_exportA(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32>
-// CHECK: return %arg0
-func.func @exportA(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
- return %arg0 : tensor<?x?xi32>
+// CHECK: util.func private @_exportA(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32>
+// CHECK: util.return %arg0
+util.func public @exportA(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
+ util.return %arg0 : tensor<?x?xi32>
}
-// CHECK: func.func @exportB(%arg0: !hal.buffer_view) -> !hal.buffer_view
-// CHECK: call @_exportB
-// CHECK: func.func private @_exportB(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32>
-// CHECK: call @_exportA
-func.func @exportB(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
- %0 = call @exportA(%arg0) : (tensor<?x?xi32>) -> tensor<?x?xi32>
- return %0 : tensor<?x?xi32>
+// CHECK: util.func public @exportB(%arg0: !hal.buffer_view) -> !hal.buffer_view
+// CHECK: util.call @_exportB
+// CHECK: util.func private @_exportB(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32>
+// CHECK: util.call @_exportA
+util.func public @exportB(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
+ %0 = util.call @exportA(%arg0) : (tensor<?x?xi32>) -> tensor<?x?xi32>
+ util.return %0 : tensor<?x?xi32>
}
// -----
@@ -159,44 +159,44 @@
// Tests that imported functions get converted to canonical ABI types and
// wrapper functions are built to preserve internal behavior.
-// CHECK-LABEL: func.func private @import(!hal.buffer_view) -> !hal.buffer_view
-func.func private @import(tensor<?x2xi32>) -> tensor<2x?xi32>
+// CHECK-LABEL: util.func private @import(%arg0: !hal.buffer_view) -> !hal.buffer_view
+util.func private @import(tensor<?x2xi32>) -> tensor<2x?xi32>
-// CHECK: func.func private @_import(%[[ARG_TENSOR:.+]]: tensor<?x2xi32>) -> tensor<2x?xi32> {
+// CHECK: util.func private @_import(%[[ARG_TENSOR:.+]]: tensor<?x2xi32>) -> tensor<2x?xi32> {
// CHECK: %[[ARG_DIM:.+]] = tensor.dim %[[ARG_TENSOR]], %c0
// CHECK: %[[ARG_VIEW:.+]] = hal.tensor.export %[[ARG_TENSOR]] : tensor<?x2xi32>{%[[ARG_DIM]]} -> !hal.buffer_view
-// CHECK: %[[RET_VIEW:.+]] = call @import(%[[ARG_VIEW]]) : (!hal.buffer_view) -> !hal.buffer_view
+// CHECK: %[[RET_VIEW:.+]] = util.call @import(%[[ARG_VIEW]]) : (!hal.buffer_view) -> !hal.buffer_view
// CHECK: %[[RET_DIM:.+]] = hal.buffer_view.dim<%[[RET_VIEW]] : !hal.buffer_view>[1]
// CHECK: %[[RET_TENSOR:.+]] = hal.tensor.import %[[RET_VIEW]] : !hal.buffer_view -> tensor<2x?xi32>{%[[RET_DIM]]}
-// CHECK: return %[[RET_TENSOR]]
+// CHECK: util.return %[[RET_TENSOR]]
// CHECK: }
-// CHECK: func.func private @caller(%arg0: tensor
-func.func private @caller(%arg0: tensor<?x2xi32>) -> tensor<2x?xi32> {
- // CHECK: call @_import(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
- %0 = call @import(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
- return %0 : tensor<2x?xi32>
+// CHECK: util.func private @caller(%arg0: tensor
+util.func private @caller(%arg0: tensor<?x2xi32>) -> tensor<2x?xi32> {
+ // CHECK: util.call @_import(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
+ %0 = util.call @import(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
+ util.return %0 : tensor<2x?xi32>
}
// -----
// Tests that imports with encodings specified are propagated to the HAL ops.
-// CHECK-LABEL: func.func private @importEncodings(!hal.buffer_view) -> !hal.buffer_view
-func.func private @importEncodings(tensor<?x2xi32> {iree.abi.encoding = tensor<?x2xf32>}) -> (tensor<2x?xi32> {iree.abi.encoding = tensor<2x?xf32>})
+// CHECK-LABEL: util.func private @importEncodings(%arg0: !hal.buffer_view) -> !hal.buffer_view
+util.func private @importEncodings(tensor<?x2xi32> {iree.abi.encoding = tensor<?x2xf32>}) -> (tensor<2x?xi32> {iree.abi.encoding = tensor<2x?xf32>})
-// CHECK: func.func private @_importEncodings(%[[ARG_TENSOR:.+]]: tensor<?x2xi32>) -> tensor<2x?xi32> {
+// CHECK: util.func private @_importEncodings(%[[ARG_TENSOR:.+]]: tensor<?x2xi32>) -> tensor<2x?xi32> {
// CHECK: %[[ARG_DIM:.+]] = tensor.dim %[[ARG_TENSOR]], %c0
// CHECK: %[[ARG_VIEW:.+]] = hal.tensor.export %[[ARG_TENSOR]] : tensor<?x2xi32>{%[[ARG_DIM]]} -> !hal.buffer_view
-// CHECK: %[[RET_VIEW:.+]] = call @importEncodings(%[[ARG_VIEW]]) : (!hal.buffer_view) -> !hal.buffer_view
+// CHECK: %[[RET_VIEW:.+]] = util.call @importEncodings(%[[ARG_VIEW]]) : (!hal.buffer_view) -> !hal.buffer_view
// CHECK: %[[RET_DIM:.+]] = hal.buffer_view.dim<%[[RET_VIEW]] : !hal.buffer_view>[1]
// CHECK: %[[RET_TENSOR:.+]] = hal.tensor.import %[[RET_VIEW]] : !hal.buffer_view -> tensor<2x?xi32>{%[[RET_DIM]]}
-// CHECK: return %[[RET_TENSOR]]
+// CHECK: util.return %[[RET_TENSOR]]
// CHECK: }
-// CHECK: func.func private @importEncodingsCaller(%arg0: tensor
-func.func private @importEncodingsCaller(%arg0: tensor<?x2xi32>) -> tensor<2x?xi32> {
+// CHECK: util.func private @importEncodingsCaller(%arg0: tensor
+util.func private @importEncodingsCaller(%arg0: tensor<?x2xi32>) -> tensor<2x?xi32> {
// CHECK: call @_importEncodings(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
- %0 = call @importEncodings(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
- return %0 : tensor<2x?xi32>
+ %0 = util.call @importEncodings(%arg0) : (tensor<?x2xi32>) -> tensor<2x?xi32>
+ util.return %0 : tensor<2x?xi32>
}
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
index f6ff6d1..5b37ee4 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-abi-wrap-entry-points{invocation-model=coarse-fences})' --split-input-file %s | FileCheck %s
-// CHECK-LABEL: func.func @asyncEntry(
+// CHECK-LABEL: util.func public @asyncEntry(
// CHECK-SAME: %[[ARG0:.+]]: !hal.buffer_view, %[[ARG1:.+]]: !hal.buffer_view, %[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence
// CHECK-SAME: -> (
// CHECK-SAME: !hal.buffer_view, !hal.buffer_view
@@ -11,93 +11,93 @@
// CHECK-SAME: } {
// CHECK-NEXT: %[[ARG0_TENSOR:.+]] = hal.tensor.import wait(%[[WAIT]]) => %[[ARG0]] "input0" : !hal.buffer_view -> tensor<4xf32>
// CHECK-NEXT: %[[ARG1_TENSOR:.+]] = hal.tensor.import wait(%[[WAIT]]) => %[[ARG1]] "input1" : !hal.buffer_view -> tensor<4xf32>
-// CHECK-NEXT: %[[RESULT_TENSORS:.+]]:2 = call @_asyncEntry(%[[ARG0_TENSOR]], %[[ARG1_TENSOR]])
+// CHECK-NEXT: %[[RESULT_TENSORS:.+]]:2 = util.call @_asyncEntry(%[[ARG0_TENSOR]], %[[ARG1_TENSOR]])
// CHECK-NEXT: %[[READY_TENSORS:.+]]:2 = hal.tensor.barrier join(%[[RESULT_TENSORS]]#0, %[[RESULT_TENSORS]]#1 : tensor<4xf32>, tensor<4xf32>) => %[[SIGNAL]] : !hal.fence
// CHECK-NEXT: %[[RET0_VIEW:.+]] = hal.tensor.export %[[READY_TENSORS]]#0 "output0" : tensor<4xf32> -> !hal.buffer_view
// CHECK-NEXT: %[[RET1_VIEW:.+]] = hal.tensor.export %[[READY_TENSORS]]#1 "output1" : tensor<4xf32> -> !hal.buffer_view
-// CHECK-NEXT: return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
+// CHECK-NEXT: util.return %[[RET0_VIEW]], %[[RET1_VIEW]] : !hal.buffer_view, !hal.buffer_view
// CHECK-NEXT: }
-// CHECK-LABEL: func.func private @_asyncEntry(
-func.func @asyncEntry(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
+// CHECK-LABEL: util.func private @_asyncEntry(
+util.func public @asyncEntry(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
%0 = arith.addf %arg0, %arg1 : tensor<4xf32>
%1 = arith.addf %0, %arg0 : tensor<4xf32>
- return %0, %1 : tensor<4xf32>, tensor<4xf32>
+ util.return %0, %1 : tensor<4xf32>, tensor<4xf32>
}
// -----
-// CHECK-LABEL: func.func @bareFunc
+// CHECK-LABEL: util.func public @bareFunc
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence)
-// CHECK: call @_bareFunc()
+// CHECK: util.call @_bareFunc()
// CHECK-NEXT: hal.fence.signal<%[[SIGNAL]] : !hal.fence>
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
-// CHECK-LABEL: func.func private @_bareFunc(
-func.func @bareFunc() {
- return
+// CHECK-LABEL: util.func private @_bareFunc(
+util.func public @bareFunc() {
+ util.return
}
// -----
-// CHECK-LABEL: func.func @primitiveArgOnly
+// CHECK-LABEL: util.func public @primitiveArgOnly
// CHECK-SAME: (%[[ARG0:.+]]: i32, %[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence)
-// CHECK-NEXT: call @_primitiveArgOnly(%[[ARG0]])
+// CHECK-NEXT: util.call @_primitiveArgOnly(%[[ARG0]])
// CHECK-NEXT: hal.fence.signal<%[[SIGNAL]] : !hal.fence>
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
-// CHECK-LABEL: func.func private @_primitiveArgOnly(
-func.func @primitiveArgOnly(%arg0: i32) {
+// CHECK-LABEL: util.func private @_primitiveArgOnly(
+util.func public @primitiveArgOnly(%arg0: i32) {
%0 = arith.addi %arg0, %arg0 : i32
util.optimization_barrier %0 : i32
- return
+ util.return
}
// -----
-// CHECK-LABEL: func.func @tensorArgOnly
+// CHECK-LABEL: util.func public @tensorArgOnly
// CHECK-SAME: (%[[ARG0:.+]]: !hal.buffer_view, %[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence)
// CHECK: %[[ARG0_TENSOR:.+]] = hal.tensor.import wait(%[[WAIT]]) => %[[ARG0]] "input0" : !hal.buffer_view -> tensor<4xf32>
-// CHECK-NEXT: call @_tensorArgOnly(%[[ARG0_TENSOR]])
+// CHECK-NEXT: util.call @_tensorArgOnly(%[[ARG0_TENSOR]])
// CHECK-NEXT: hal.fence.signal<%[[SIGNAL]] : !hal.fence>
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
-// CHECK-LABEL: func.func private @_tensorArgOnly(
-func.func @tensorArgOnly(%arg0: tensor<4xf32>) {
+// CHECK-LABEL: util.func private @_tensorArgOnly(
+util.func public @tensorArgOnly(%arg0: tensor<4xf32>) {
%0 = arith.addf %arg0, %arg0 : tensor<4xf32>
util.optimization_barrier %0 : tensor<4xf32>
- return
+ util.return
}
// -----
-// CHECK-LABEL: func.func @primitiveResultOnly
+// CHECK-LABEL: util.func public @primitiveResultOnly
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence) -> i32
-// CHECK-NEXT: %[[RESULT:.+]] = call @_primitiveResultOnly()
+// CHECK-NEXT: %[[RESULT:.+]] = util.call @_primitiveResultOnly()
// CHECK-NEXT: hal.fence.signal<%[[SIGNAL]] : !hal.fence>
-// CHECK-NEXT: return %[[RESULT]]
+// CHECK-NEXT: util.return %[[RESULT]]
-// CHECK-LABEL: func.func private @_primitiveResultOnly(
-func.func @primitiveResultOnly() -> i32 {
+// CHECK-LABEL: util.func private @_primitiveResultOnly(
+util.func public @primitiveResultOnly() -> i32 {
%0 = arith.constant 8 : i32
%1 = util.optimization_barrier %0 : i32
- return %1 : i32
+ util.return %1 : i32
}
// -----
-// CHECK-LABEL: func.func @tensorResultOnly
+// CHECK-LABEL: util.func public @tensorResultOnly
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence) -> !hal.buffer_view
-// CHECK-NEXT: %[[RESULT_TENSOR:.+]] = call @_tensorResultOnly()
+// CHECK-NEXT: %[[RESULT_TENSOR:.+]] = util.call @_tensorResultOnly()
// CHECK-NEXT: %[[READY_TENSOR:.+]] = hal.tensor.barrier join(%[[RESULT_TENSOR]] : tensor<4xf32>) => %[[SIGNAL]] : !hal.fence
// CHECK-NEXT: %[[RESULT_VIEW:.+]] = hal.tensor.export %[[READY_TENSOR]]
-// CHECK-NEXT: return %[[RESULT_VIEW]]
+// CHECK-NEXT: util.return %[[RESULT_VIEW]]
-// CHECK-LABEL: func.func private @_tensorResultOnly(
-func.func @tensorResultOnly() -> tensor<4xf32> {
+// CHECK-LABEL: util.func private @_tensorResultOnly(
+util.func public @tensorResultOnly() -> tensor<4xf32> {
%0 = arith.constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32>
%1 = util.optimization_barrier %0 : tensor<4xf32>
- return %1 : tensor<4xf32>
+ util.return %1 : tensor<4xf32>
}
// -----
@@ -108,13 +108,13 @@
// that is part of their ABI. Users can always manually specify the fences too
// though that's much more verbose.
-// CHECK-LABEL: func.func private @import(!hal.buffer_view, !hal.buffer_view, !hal.fence, !hal.fence) -> (!hal.buffer_view, !hal.buffer_view)
-func.func private @import(tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) attributes {
+// CHECK-LABEL: util.func private @import(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> (!hal.buffer_view, !hal.buffer_view)
+util.func private @import(tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) attributes {
iree.abi.model = "coarse-fences",
nosideeffects
}
-// CHECK: func.func private @_import(%[[ARG0_TENSOR:.+]]: tensor<?x2xi32>, %[[ARG1_TENSOR:.+]]: tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) {
+// CHECK: util.func private @_import(%[[ARG0_TENSOR:.+]]: tensor<?x2xi32>, %[[ARG1_TENSOR:.+]]: tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) {
// Prepare fences and put a barrier on input arguments:
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
@@ -129,7 +129,7 @@
// CHECK: %[[ARG1_VIEW:.+]] = hal.tensor.export %[[ARG_BARRIER]]#1 : tensor<?x3xi32>{%[[ARG1_DIM]]} -> !hal.buffer_view
// Call the import:
-// CHECK: %[[RET_VIEWS:.+]]:2 = call @import(%[[ARG0_VIEW]], %[[ARG1_VIEW]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (!hal.buffer_view, !hal.buffer_view, !hal.fence, !hal.fence) -> (!hal.buffer_view, !hal.buffer_view)
+// CHECK: %[[RET_VIEWS:.+]]:2 = util.call @import(%[[ARG0_VIEW]], %[[ARG1_VIEW]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (!hal.buffer_view, !hal.buffer_view, !hal.fence, !hal.fence) -> (!hal.buffer_view, !hal.buffer_view)
// Import output results from buffer views:
// CHECK: %[[RET0_DIM:.+]] = hal.buffer_view.dim<%[[RET_VIEWS]]#0 : !hal.buffer_view>[1]
@@ -137,53 +137,53 @@
// CHECK: %[[RET1_DIM:.+]] = hal.buffer_view.dim<%[[RET_VIEWS]]#1 : !hal.buffer_view>[1]
// CHECK: %[[RET1_TENSOR:.+]] = hal.tensor.import wait(%[[SIGNAL_FENCE]]) => %[[RET_VIEWS]]#1 : !hal.buffer_view -> tensor<3x?xi32>{%[[RET1_DIM]]}
-// CHECK: return %[[RET0_TENSOR]], %[[RET1_TENSOR]] : tensor<2x?xi32>, tensor<3x?xi32>
+// CHECK: util.return %[[RET0_TENSOR]], %[[RET1_TENSOR]] : tensor<2x?xi32>, tensor<3x?xi32>
// CHECK: }
-// CHECK: func.func private @caller(%[[ARG0_CALLER:.+]]: tensor<?x2xi32>, %[[ARG1_CALLER:.+]]: tensor<?x3xi32>)
-func.func private @caller(%arg0: tensor<?x2xi32>, %arg1: tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) {
- // CHECK: %[[RESULTS:.+]]:2 = call @_import(%[[ARG0_CALLER]], %[[ARG1_CALLER]]) : (tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>)
- %results:2 = call @import(%arg0, %arg1) : (tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>)
- // CHECK-NEXT: return %[[RESULTS]]#0, %[[RESULTS]]#1
- return %results#0, %results#1 : tensor<2x?xi32>, tensor<3x?xi32>
+// CHECK: util.func private @caller(%[[ARG0_CALLER:.+]]: tensor<?x2xi32>, %[[ARG1_CALLER:.+]]: tensor<?x3xi32>)
+util.func private @caller(%arg0: tensor<?x2xi32>, %arg1: tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>) {
+ // CHECK: %[[RESULTS:.+]]:2 = util.call @_import(%[[ARG0_CALLER]], %[[ARG1_CALLER]]) : (tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>)
+ %results:2 = util.call @import(%arg0, %arg1) : (tensor<?x2xi32>, tensor<?x3xi32>) -> (tensor<2x?xi32>, tensor<3x?xi32>)
+ // CHECK-NEXT: util.return %[[RESULTS]]#0, %[[RESULTS]]#1
+ util.return %results#0, %results#1 : tensor<2x?xi32>, tensor<3x?xi32>
}
// -----
// Tests a side-effect-free import that doesn't take/return reference types.
-// CHECK-LABEL: func.func private @importI32(i32, !hal.fence, !hal.fence) -> i32
-func.func private @importI32(i32) -> i32 attributes {
+// CHECK-LABEL: util.func private @importI32(%arg0: i32, %arg1: !hal.fence, %arg2: !hal.fence) -> i32
+util.func private @importI32(i32) -> i32 attributes {
iree.abi.model = "coarse-fences",
nosideeffects
}
// No fences required as the call has no side-effects and no async resources.
-// CHECK: func.func private @_importI32(%[[ARG0:.+]]: i32) -> i32 {
+// CHECK: util.func private @_importI32(%[[ARG0:.+]]: i32) -> i32 {
// CHECK: %[[WAIT_FENCE:.+]] = util.null : !hal.fence
// CHECK: %[[SIGNAL_FENCE:.+]] = util.null : !hal.fence
-// CHECK: %[[RET0:.+]] = call @importI32(%[[ARG0]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (i32, !hal.fence, !hal.fence) -> i32
-// CHECK: return %[[RET0]] : i32
+// CHECK: %[[RET0:.+]] = util.call @importI32(%[[ARG0]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (i32, !hal.fence, !hal.fence) -> i32
+// CHECK: util.return %[[RET0]] : i32
// CHECK: }
-// CHECK: func.func private @callerI32(%[[ARG0_CALLER:.+]]: i32)
-func.func private @callerI32(%arg0: i32) -> i32 {
- // CHECK: %[[RESULT:.+]] = call @_importI32(%[[ARG0_CALLER]]) : (i32) -> i32
- %result = call @importI32(%arg0) : (i32) -> i32
- // CHECK-NEXT: return %[[RESULT]]
- return %result : i32
+// CHECK: util.func private @callerI32(%[[ARG0_CALLER:.+]]: i32)
+util.func private @callerI32(%arg0: i32) -> i32 {
+ // CHECK: %[[RESULT:.+]] = util.call @_importI32(%[[ARG0_CALLER]]) : (i32) -> i32
+ %result = util.call @importI32(%arg0) : (i32) -> i32
+ // CHECK-NEXT: util.return %[[RESULT]]
+ util.return %result : i32
}
// -----
// Tests a side-effecting import that requires a host-side wait.
-// CHECK-LABEL: func.func private @importI32Effects(!hal.buffer_view, !hal.fence, !hal.fence) -> i32
-func.func private @importI32Effects(tensor<4xf32>) -> i32 attributes {
+// CHECK-LABEL: util.func private @importI32Effects(%arg0: !hal.buffer_view, %arg1: !hal.fence, %arg2: !hal.fence) -> i32
+util.func private @importI32Effects(tensor<4xf32>) -> i32 attributes {
iree.abi.model = "coarse-fences"
}
-// CHECK: func.func private @_importI32Effects(%[[ARG0_TENSOR:.+]]: tensor<4xf32>) -> i32 {
+// CHECK: util.func private @_importI32Effects(%[[ARG0_TENSOR:.+]]: tensor<4xf32>) -> i32 {
// Wait for the inputs to be ready and create the signal fence to wait on.
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
@@ -195,18 +195,18 @@
// CHECK: %[[ARG0_VIEW:.+]] = hal.tensor.export %[[ARG0_BARRIER]] : tensor<4xf32> -> !hal.buffer_view
// Make the import call:
-// CHECK: %[[RET0:.+]] = call @importI32Effects(%[[ARG0_VIEW]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (!hal.buffer_view, !hal.fence, !hal.fence) -> i32
+// CHECK: %[[RET0:.+]] = util.call @importI32Effects(%[[ARG0_VIEW]], %[[WAIT_FENCE]], %[[SIGNAL_FENCE]]) : (!hal.buffer_view, !hal.fence, !hal.fence) -> i32
// Perform host-side wait.
// CHECK: hal.fence.await until([%[[SIGNAL_FENCE]]])
-// CHECK: return %[[RET0]] : i32
+// CHECK: util.return %[[RET0]] : i32
// CHECK: }
-// CHECK: func.func private @callerI32Effects(%[[ARG0_CALLER:.+]]: tensor<4xf32>)
-func.func private @callerI32Effects(%arg0: tensor<4xf32>) -> i32 {
- // CHECK: %[[RESULT:.+]] = call @_importI32Effects(%[[ARG0_CALLER]]) : (tensor<4xf32>) -> i32
- %result = call @importI32Effects(%arg0) : (tensor<4xf32>) -> i32
- // CHECK-NEXT: return %[[RESULT]]
- return %result : i32
+// CHECK: util.func private @callerI32Effects(%[[ARG0_CALLER:.+]]: tensor<4xf32>)
+util.func private @callerI32Effects(%arg0: tensor<4xf32>) -> i32 {
+ // CHECK: %[[RESULT:.+]] = util.call @_importI32Effects(%[[ARG0_CALLER]]) : (tensor<4xf32>) -> i32
+ %result = util.call @importI32Effects(%arg0) : (tensor<4xf32>) -> i32
+ // CHECK-NEXT: util.return %[[RESULT]]
+ util.return %result : i32
}
diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/BUILD.bazel b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/BUILD.bazel
index 754d084..154bf8d 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/BUILD.bazel
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/BUILD.bazel
@@ -30,7 +30,6 @@
"@llvm-project//llvm:Support",
"@llvm-project//mlir:AffineUtils",
"@llvm-project//mlir:ControlFlowDialect",
- "@llvm-project//mlir:FuncDialect",
"@llvm-project//mlir:FunctionInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",
diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/CMakeLists.txt b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/CMakeLists.txt
index 66b8fda..70fe8ae 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/CMakeLists.txt
@@ -22,7 +22,6 @@
LLVMSupport
MLIRAffineUtils
MLIRControlFlowDialect
- MLIRFuncDialect
MLIRFunctionInterfaces
MLIRIR
MLIRPass
diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp
index 8f7f989..323864e 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/Passes.cpp
@@ -10,7 +10,6 @@
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "iree/compiler/Utils/PassUtils.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Transforms/Passes.h"
@@ -18,7 +17,7 @@
namespace mlir::iree_compiler::IREE::TFLite {
using FunctionLikeNest =
- MultiOpNest<func::FuncOp, IREE::Util::InitializerOp, IREE::Util::FuncOp>;
+ MultiOpNest<IREE::Util::InitializerOp, IREE::Util::FuncOp>;
void buildTransformPassPipeline(OpPassManager &passManager) {
// Wraps the entry points in a "_tflite_xx" function and adds shape support.
diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp
index 95f9381..5e0c539 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/WrapEntryPoints.cpp
@@ -12,7 +12,6 @@
#include "llvm/ADT/StringExtras.h"
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/MLIRContext.h"
@@ -46,9 +45,8 @@
: public PassWrapper<WrapEntryPointsPass, OperationPass<ModuleOp>> {
public:
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<mlir::func::FuncDialect, mlir::arith::ArithDialect,
- mlir::tensor::TensorDialect, IREE::HAL::HALDialect,
- IREE::Util::UtilDialect>();
+ registry.insert<mlir::arith::ArithDialect, mlir::tensor::TensorDialect,
+ IREE::HAL::HALDialect, IREE::Util::UtilDialect>();
}
StringRef getArgument() const override {
@@ -60,13 +58,13 @@
"bindings";
}
- static StringAttr getArgId(func::FuncOp funcOp, int i) {
+ static StringAttr getArgId(IREE::Util::FuncOp funcOp, int i) {
StringAttr id =
funcOp.getArgAttrOfType<StringAttr>(i, "ml_program.identifier");
return id ? id : funcOp.getArgAttrOfType<StringAttr>(i, "iree.identifier");
}
- static StringAttr getResultId(func::FuncOp funcOp, int i) {
+ static StringAttr getResultId(IREE::Util::FuncOp funcOp, int i) {
StringAttr id =
funcOp.getResultAttrOfType<StringAttr>(i, "ml_program.identifier");
return id ? id
@@ -76,8 +74,8 @@
void runOnOperation() override {
auto moduleOp = getOperation();
- SmallVector<func::FuncOp> entryFuncOps;
- for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
+ SmallVector<IREE::Util::FuncOp> entryFuncOps;
+ for (auto funcOp : moduleOp.getOps<IREE::Util::FuncOp>()) {
if (funcOp.isPublic() && !funcOp->hasAttr("iree.abi.stub")) {
entryFuncOps.push_back(funcOp);
}
@@ -139,7 +137,7 @@
// Creates dynamic dim globals for each input and output of |funcOp|.
static std::pair<SmallVector<DynamicDims>, SmallVector<DynamicDims>>
createDynamicDimGlobals(Location loc, StringRef namePrefix,
- mlir::func::FuncOp funcOp, OpBuilder &moduleBuilder) {
+ IREE::Util::FuncOp funcOp, OpBuilder &moduleBuilder) {
auto funcType = funcOp.getFunctionType();
// TFLite requires the tensor names at runtime. If they've previously been
@@ -187,14 +185,14 @@
}
// Derives a shape calculation function from the given entry point |funcOp|.
- static mlir::func::FuncOp createShapeCalculationFunc(
- Location loc, StringRef namePrefix, mlir::func::FuncOp funcOp,
+ static IREE::Util::FuncOp createShapeCalculationFunc(
+ Location loc, StringRef namePrefix, IREE::Util::FuncOp funcOp,
ArrayRef<DynamicDims> inputDynamicDims,
ArrayRef<DynamicDims> outputDynamicDims,
IREE::Util::GlobalOp dirtyGlobalOp, OpBuilder &moduleBuilder) {
// Clone the entire entry function with all its IR.
auto calcFuncOp =
- cast<mlir::func::FuncOp>(moduleBuilder.clone(*funcOp.getOperation()));
+ cast<IREE::Util::FuncOp>(moduleBuilder.clone(*funcOp.getOperation()));
calcFuncOp.setName(
moduleBuilder.getStringAttr(namePrefix.str() + "_calculate_shapes"));
calcFuncOp.setPrivate();
@@ -242,7 +240,7 @@
// Replace each exit from the function with a storage back to the shape
// variables.
for (auto returnOp :
- llvm::to_vector(calcFuncOp.getOps<mlir::func::ReturnOp>())) {
+ llvm::to_vector(calcFuncOp.getOps<IREE::Util::ReturnOp>())) {
auto exitLoc = returnOp.getLoc();
OpBuilder exitBuilder(returnOp);
@@ -264,11 +262,11 @@
auto falseValue =
exitBuilder.createOrFold<arith::ConstantIntOp>(exitLoc, 0, 1);
dirtyGlobalOp.createStoreOp(exitLoc, falseValue, exitBuilder);
- exitBuilder.create<mlir::func::ReturnOp>(exitLoc);
+ exitBuilder.create<IREE::Util::ReturnOp>(exitLoc);
returnOp.erase();
}
- OpBuilder::atBlockBegin(returnBlock).create<mlir::func::ReturnOp>(loc);
+ OpBuilder::atBlockBegin(returnBlock).create<IREE::Util::ReturnOp>(loc);
return calcFuncOp;
}
@@ -363,7 +361,7 @@
void createQueryInputShapeFunc(Location loc, StringRef namePrefix,
ArrayRef<DynamicDims> inputDynamicDims,
OpBuilder &moduleBuilder) {
- auto queryFuncOp = moduleBuilder.create<mlir::func::FuncOp>(
+ auto queryFuncOp = moduleBuilder.create<IREE::Util::FuncOp>(
loc, namePrefix.str() + "_query_input_shape",
moduleBuilder.getFunctionType(/*inputs=*/
TypeRange{
@@ -385,7 +383,7 @@
entryBuilder);
auto exitBuilder = OpBuilder::atBlockBegin(exitBlock);
- exitBuilder.create<mlir::func::ReturnOp>(loc);
+ exitBuilder.create<IREE::Util::ReturnOp>(loc);
}
// Creates a function to resize |inputGlobalOps| and sets the |dirtyGlobalOp|
@@ -396,7 +394,7 @@
ArrayRef<DynamicDims> inputDynamicDims,
IREE::Util::GlobalOp dirtyGlobalOp,
OpBuilder &moduleBuilder) {
- auto resizeFuncOp = moduleBuilder.create<mlir::func::FuncOp>(
+ auto resizeFuncOp = moduleBuilder.create<IREE::Util::FuncOp>(
loc, namePrefix.str() + "_resize_input_shape",
moduleBuilder.getFunctionType(/*inputs=*/
TypeRange{
@@ -421,7 +419,7 @@
auto exitBuilder = OpBuilder::atBlockBegin(exitBlock);
auto trueValue = exitBuilder.createOrFold<arith::ConstantIntOp>(loc, 1, 1);
dirtyGlobalOp.createStoreOp(loc, trueValue, exitBuilder);
- exitBuilder.create<mlir::func::ReturnOp>(loc);
+ exitBuilder.create<IREE::Util::ReturnOp>(loc);
}
// Creates a function to query the |outputGlobalOps| at runtime by the
@@ -430,9 +428,9 @@
// func.func @_query_output_shape(%index : index, %shape : !util.list<index>)
void createQueryOutputShapeFunc(Location loc, StringRef namePrefix,
ArrayRef<DynamicDims> outputDynamicDims,
- mlir::func::FuncOp calculateShapeFuncOp,
+ IREE::Util::FuncOp calculateShapeFuncOp,
OpBuilder &moduleBuilder) {
- auto queryFuncOp = moduleBuilder.create<func::FuncOp>(
+ auto queryFuncOp = moduleBuilder.create<IREE::Util::FuncOp>(
loc, namePrefix.str() + "_query_output_shape",
moduleBuilder.getFunctionType(/*inputs=*/
TypeRange{
@@ -448,7 +446,8 @@
// Always call the recalculation function - it checks for whether it needs
// to run based on the dirty flag value.
- entryBuilder.create<mlir::func::CallOp>(loc, calculateShapeFuncOp);
+ entryBuilder.create<IREE::Util::CallOp>(loc, calculateShapeFuncOp,
+ ValueRange{});
auto *exitBlock = buildSwitch(
loc, entryBlock->getArgument(0), outputDynamicDims.size(),
@@ -458,7 +457,7 @@
entryBuilder);
auto exitBuilder = OpBuilder::atBlockBegin(exitBlock);
- exitBuilder.create<mlir::func::ReturnOp>(loc);
+ exitBuilder.create<IREE::Util::ReturnOp>(loc);
}
// Creates the corresponding wrapper function for the given entry point.
@@ -472,7 +471,7 @@
//
// NOTE: today we only support a single entry point; with minor tweaks we
// could fix this up to support multiple if we wanted.
- void createWrapperFunc(StringRef namePrefix, mlir::func::FuncOp entryFuncOp,
+ void createWrapperFunc(StringRef namePrefix, IREE::Util::FuncOp entryFuncOp,
ArrayRef<DynamicDims> inputDynamicDims,
ArrayRef<DynamicDims> outputDynamicDims,
IREE::Util::GlobalOp dirtyGlobalOp,
@@ -487,7 +486,7 @@
auto wrapperFuncType =
moduleBuilder.getFunctionType(inputTypes, outputTypes);
- auto wrapperFuncOp = moduleBuilder.create<mlir::func::FuncOp>(
+ auto wrapperFuncOp = moduleBuilder.create<IREE::Util::FuncOp>(
entryFuncOp.getLoc(), "_tflite_main", wrapperFuncType);
wrapperFuncOp.setPublic();
wrapperFuncOp.getOperation()->setAttr("iree.abi.stub",
@@ -527,7 +526,7 @@
TypeAttr::get(inputDynamicDims.tensorType), dynamicDims,
/*wait_fence=*/Value{}, /*name=*/nullptr));
}
- auto callOp = entryBuilder.create<mlir::func::CallOp>(
+ auto callOp = entryBuilder.create<IREE::Util::CallOp>(
entryFuncOp.getLoc(), entryFuncOp, callOperands);
SmallVector<Value> callResults;
for (auto [result, outputDynamicDims] :
@@ -554,11 +553,11 @@
entryBuilder.create<arith::ConstantIntOp>(entryFuncOp.getLoc(), 0, 1),
entryBuilder);
- entryBuilder.create<mlir::func::ReturnOp>(entryFuncOp.getLoc(),
+ entryBuilder.create<IREE::Util::ReturnOp>(entryFuncOp.getLoc(),
callResults);
}
- void wrapEntryPoint(mlir::func::FuncOp funcOp) {
+ void wrapEntryPoint(IREE::Util::FuncOp funcOp) {
auto loc = funcOp.getLoc();
auto namePrefix = ("_tflite_" + funcOp.getName()).str();
OpBuilder moduleBuilder(funcOp);
@@ -601,8 +600,8 @@
// Populates attributes on |wrapperFuncOp| to support runtime reflection like
// IO tensor names and quantization information.
- void populateReflectionAttrs(mlir::func::FuncOp entryFuncOp,
- mlir::func::FuncOp wrapperFuncOp) {
+ void populateReflectionAttrs(IREE::Util::FuncOp entryFuncOp,
+ IREE::Util::FuncOp wrapperFuncOp) {
SmallVector<NamedAttribute> attrs;
attrs.push_back(buildIONamesAttr(entryFuncOp));
// TODO(#3972): tfl.io.quant: quantization information.
@@ -615,7 +614,7 @@
// tfl.io.names=arg0;arg1;ret0;ret1
//
// Default names will be used if no identifiers are set on the function.
- NamedAttribute buildIONamesAttr(mlir::func::FuncOp entryFuncOp) {
+ NamedAttribute buildIONamesAttr(IREE::Util::FuncOp entryFuncOp) {
SmallVector<std::string> pieces;
for (int i = 0; i < entryFuncOp.getNumArguments(); ++i) {
auto identifierAttr = getArgId(entryFuncOp, i);
diff --git a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/test/wrap_entry_points.mlir b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/test/wrap_entry_points.mlir
index fc087c1..5d9216b 100644
--- a/compiler/src/iree/compiler/Bindings/TFLite/Transforms/test/wrap_entry_points.mlir
+++ b/compiler/src/iree/compiler/Bindings/TFLite/Transforms/test/wrap_entry_points.mlir
@@ -14,7 +14,7 @@
-// CHECK-LABEL: func.func private @_tflite_dynamicEntry_calculate_shapes() {
+// CHECK-LABEL: util.func private @_tflite_dynamicEntry_calculate_shapes() {
// Only recalculate shapes if the shapes are dirty.
// CHECK: %[[IS_DIRTY:.+]] = util.global.load @_tflite_dynamicEntry_shapes_dirty : i1
@@ -43,16 +43,16 @@
// Clear dirty bit now that the shapes have been recalculated.
// CHECK: util.global.store %false, @_tflite_dynamicEntry_shapes_dirty : i1
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
// Exit for when the shapes are not dirty and no work is needed.
// CHECK-NEXT: ^bb2:
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @_tflite_dynamicEntry_query_input_shape
+// CHECK-LABEL: util.func public @_tflite_dynamicEntry_query_input_shape
// CHECK-SAME: (%[[INDEX:.+]]: index, %[[LIST:.+]]: !util.list<index>)
// Query input0 shape:
@@ -82,12 +82,12 @@
// Invalid input index:
// CHECK: ^bb4:
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @_tflite_dynamicEntry_resize_input_shape
+// CHECK-LABEL: util.func public @_tflite_dynamicEntry_resize_input_shape
// CHECK-SAME: (%[[INDEX:.+]]: index, %[[LIST:.+]]: !util.list<index>)
// CHECK: %[[IS_0:.+]] = arith.cmpi eq, %[[INDEX]], %c0 : index
@@ -108,12 +108,12 @@
// Set the dirty flag so that shape calculation must run again.
// CHECK-NEXT: ^bb4:
// CHECK-NEXT: util.global.store %true, @_tflite_dynamicEntry_shapes_dirty : i1
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @_tflite_dynamicEntry_query_output_shape
+// CHECK-LABEL: util.func public @_tflite_dynamicEntry_query_output_shape
// CHECK-SAME: (%[[INDEX:.+]]: index, %[[LIST:.+]]: !util.list<index>)
// Recalculate shapes, if needed.
@@ -145,12 +145,12 @@
// CHECK-NEXT: cf.br ^bb4
// CHECK-NEXT: ^bb4:
-// CHECK-NEXT: return
+// CHECK-NEXT: util.return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @_tflite_main(
+// CHECK-LABEL: util.func public @_tflite_main(
// CHECK-SAME: %[[IN0_BUFFER:.+]]: !hal.buffer {iree.identifier = "input0"},
// CHECK-SAME: %[[IN1_BUFFER:.+]]: !hal.buffer {iree.identifier = "input1"})
// CHECK-SAME: -> (
@@ -172,7 +172,7 @@
// CHECK-NEXT: %[[IN1:.+]] = hal.tensor.import %[[IN1_BUFFER]] : !hal.buffer -> tensor<?x8x8x3xf32>{%[[IN1_DIM0]]}
// Call the original function with tensor arguments.
-// CHECK: %[[OUT:.+]]:2 = call @dynamicEntry(%[[IN0]], %[[IN1]]) : (tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>) -> (tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>)
+// CHECK: %[[OUT:.+]]:2 = util.call @dynamicEntry(%[[IN0]], %[[IN1]]) : (tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>) -> (tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>)
// Query output0 shape and get the HAL buffer to return.
// CHECK: %[[OUT0_DIM0:.+]] = tensor.dim %[[OUT]]#0, %c0 : tensor<?x8x8x3xf32>
@@ -187,13 +187,13 @@
// Clear shape dirty bit as we've updated the shapes unconditionally.
// CHECK-NEXT: util.global.store %false, @_tflite_dynamicEntry_shapes_dirty : i1
-// CHECK-NEXT: return %[[OUT0_BUFFER]], %[[OUT1_BUFFER]]
+// CHECK-NEXT: util.return %[[OUT0_BUFFER]], %[[OUT1_BUFFER]]
// CHECK-NEXT: }
-// CHECK-LABEL: func.func private @dynamicEntry(
-func.func @dynamicEntry(
+// CHECK-LABEL: util.func private @dynamicEntry(
+util.func public @dynamicEntry(
%arg0: tensor<?x8x8x3xf32> {iree.identifier = "input0"},
%arg1: tensor<?x8x8x3xf32> {iree.identifier = "input1"}
) -> (
@@ -204,13 +204,13 @@
%0 = arith.addf %arg0, %arg1 : tensor<?x8x8x3xf32>
// CHECK: = arith.addf
%1 = arith.addf %0, %arg0 : tensor<?x8x8x3xf32>
- // CHECK: return
- return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
+ // CHECK: util.return
+ util.return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
}
// -----
-// CHECK-LABEL: func.func @_tflite_main(
+// CHECK-LABEL: util.func public @_tflite_main(
// CHECK-SAME: %[[IN0_BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[IN1_BUFFER:.+]]: !hal.buffer)
// CHECK-SAME: -> (
@@ -223,7 +223,7 @@
// CHECK-SAME: }
// CHECK-SAME: } {
-func.func @dynamicEntryWithoutIdentifiers(
+util.func public @dynamicEntryWithoutIdentifiers(
%arg0: tensor<?x8x8x3xf32>,
%arg1: tensor<?x8x8x3xf32>
) -> (
@@ -234,6 +234,6 @@
%0 = arith.addf %arg0, %arg1 : tensor<?x8x8x3xf32>
// CHECK: = arith.addf
%1 = arith.addf %0, %arg0 : tensor<?x8x8x3xf32>
- // CHECK: return
- return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
+ // CHECK: util.return
+ util.return %0, %1 : tensor<?x8x8x3xf32>, tensor<?x8x8x3xf32>
}
diff --git a/compiler/src/iree/compiler/ConstEval/test/compile_regressions.mlir b/compiler/src/iree/compiler/ConstEval/test/compile_regressions.mlir
index 72064e4..ff74a8d 100644
--- a/compiler/src/iree/compiler/ConstEval/test/compile_regressions.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/compile_regressions.mlir
@@ -2,7 +2,7 @@
// Test case reduced by running the pass --iree-util-hoist-into-globals on the
// following (and then chang the check to a return):
-// func.func @i1_inline_constant() {
+// util.func public @i1_inline_constant() {
// %control = arith.constant dense<[true, false, true, false]> : tensor<4xi1>
// %a = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
// %b = arith.constant dense<[5, 6, 7, 8]> : tensor<4xi32>
@@ -18,7 +18,7 @@
// linalg.yield %0 : i32
// } -> tensor<4xi32>
// check.expect_eq_const(%c, dense<[1, 6, 3, 8]> : tensor<4xi32>) : tensor<4xi32>
-// return
+// util.return
// }
// CHECK-LABEL: module @hoisted_tensor_i1_input
@@ -27,9 +27,9 @@
#map = affine_map<(d0) -> (d0)>
module @hoisted_tensor_i1_input {
util.global private @hoisted : tensor<4xi32>
- func.func @i1_inline_constant() -> tensor<4xi32> {
+ util.func public @i1_inline_constant() -> tensor<4xi32> {
%hoisted = util.global.load @hoisted : tensor<4xi32>
- return %hoisted : tensor<4xi32>
+ util.return %hoisted : tensor<4xi32>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[true, false, true, false]> : tensor<4xi1>
diff --git a/compiler/src/iree/compiler/ConstEval/test/failing.mlir b/compiler/src/iree/compiler/ConstEval/test/failing.mlir
index c2fb16c..5fabbeb 100644
--- a/compiler/src/iree/compiler/ConstEval/test/failing.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/failing.mlir
@@ -6,9 +6,9 @@
module @eval_i64_scalar {
util.global private @offset : f64 = -2.0 : f64
util.global private @hoisted : f64
- func.func @main() -> f64 {
+ util.func public @main() -> f64 {
%hoisted = util.global.load @hoisted : f64
- return %hoisted : f64
+ util.return %hoisted : f64
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44.0 : f64
diff --git a/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir b/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir
index f662751..13429e8 100644
--- a/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/jit_globals.mlir
@@ -4,9 +4,9 @@
module @no_uninitialized {
util.global private @hoisted : tensor<5x6xf32> = dense<4.0> : tensor<5x6xf32>
- func.func @main() -> tensor<5x6xf32> {
+ util.func public @main() -> tensor<5x6xf32> {
%hoisted = util.global.load @hoisted : tensor<5x6xf32>
- return %hoisted : tensor<5x6xf32>
+ util.return %hoisted : tensor<5x6xf32>
}
}
@@ -17,9 +17,9 @@
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module @linalg_tensor_jit {
util.global private @hoisted : tensor<5x6xf32>
- func.func @main() -> tensor<5x6xf32> {
+ util.func public @main() -> tensor<5x6xf32> {
%hoisted = util.global.load @hoisted : tensor<5x6xf32>
- return %hoisted : tensor<5x6xf32>
+ util.return %hoisted : tensor<5x6xf32>
}
// CHECK-NOT: util.initializer
util.initializer attributes {iree.compiler.consteval} {
@@ -45,9 +45,9 @@
// CHECK: util.global private @{{.*}} = dense<2> : tensor<2xi32>
module @eval_splat_detection {
util.global private @hoisted : tensor<2xi32>
- func.func @main() -> tensor<2xi32> {
+ util.func public @main() -> tensor<2xi32> {
%hoisted = util.global.load @hoisted : tensor<2xi32>
- return %hoisted : tensor<2xi32>
+ util.return %hoisted : tensor<2xi32>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2, 2]> : tensor<2xi32>
@@ -61,9 +61,9 @@
// CHECK-LABEL: @eval_f16_tensor
module @eval_f16_tensor {
util.global private @hoisted : tensor<5x6xf16>
- func.func @main() -> tensor<5x6xf16> {
+ util.func public @main() -> tensor<5x6xf16> {
%hoisted = util.global.load @hoisted : tensor<5x6xf16>
- return %hoisted : tensor<5x6xf16>
+ util.return %hoisted : tensor<5x6xf16>
}
// expected-warning @+1 {{unsupported type for current jit configuration}}
util.initializer attributes {iree.compiler.consteval} {
@@ -78,9 +78,9 @@
// Not currently supported (initializer should remain)
module @eval_bf16_tensor {
util.global private @hoisted : tensor<5x6xbf16>
- func.func @main() -> tensor<5x6xbf16> {
+ util.func public @main() -> tensor<5x6xbf16> {
%hoisted = util.global.load @hoisted : tensor<5x6xbf16>
- return %hoisted : tensor<5x6xbf16>
+ util.return %hoisted : tensor<5x6xbf16>
}
// expected-warning @+1 {{unsupported type for current jit configuration}}
util.initializer attributes {iree.compiler.consteval} {
@@ -95,9 +95,9 @@
// CHECK: util.global private @{{.*}} = dense<[2.000000e+02, 3.200000e+03]> : tensor<2xf32>
module @eval_f32_tensor {
util.global private @hoisted : tensor<2xf32>
- func.func @main() -> tensor<2xf32> {
+ util.func public @main() -> tensor<2xf32> {
%hoisted = util.global.load @hoisted : tensor<2xf32>
- return %hoisted : tensor<2xf32>
+ util.return %hoisted : tensor<2xf32>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2.0e+2, 3.2e+3]> : tensor<2xf32>
@@ -110,9 +110,9 @@
// CHECK-LABEL: @eval_f64_tensor
module @eval_f64_tensor {
util.global private @hoisted : tensor<2xf64>
- func.func @main() -> tensor<2xf64> {
+ util.func public @main() -> tensor<2xf64> {
%hoisted = util.global.load @hoisted : tensor<2xf64>
- return %hoisted : tensor<2xf64>
+ util.return %hoisted : tensor<2xf64>
}
// expected-warning @+1 {{unsupported type for current jit configuration}}
util.initializer attributes {iree.compiler.consteval} {
@@ -127,9 +127,9 @@
// CHECK: util.global private @{{.*}} = dense<[false, true, false, true, true, false]> : tensor<6xi1>
module @eval_i1_tensor {
util.global private @hoisted : tensor<6xi1>
- func.func @main() -> tensor<6xi1> {
+ util.func public @main() -> tensor<6xi1> {
%hoisted = util.global.load @hoisted : tensor<6xi1>
- return %hoisted : tensor<6xi1>
+ util.return %hoisted : tensor<6xi1>
}
util.initializer attributes {iree.compiler.consteval} {
// Note that the level we are testing at is a bit odd in the way i1 vs
@@ -145,9 +145,9 @@
// CHECK-LABEL: @eval_i4_tensor
module @eval_i4_tensor {
util.global private @hoisted : tensor<5x6xi4>
- func.func @main() -> tensor<5x6xi4> {
+ util.func public @main() -> tensor<5x6xi4> {
%hoisted = util.global.load @hoisted : tensor<5x6xi4>
- return %hoisted : tensor<5x6xi4>
+ util.return %hoisted : tensor<5x6xi4>
}
// expected-warning @+1 {{unsupported type for current jit configuration}}
util.initializer attributes {iree.compiler.consteval} {
@@ -162,9 +162,9 @@
// CHECK: util.global private @{{.*}} = dense<[2, 3]> : tensor<2xi8>
module @eval_i8_tensor {
util.global private @hoisted : tensor<2xi8>
- func.func @main() -> tensor<2xi8> {
+ util.func public @main() -> tensor<2xi8> {
%hoisted = util.global.load @hoisted : tensor<2xi8>
- return %hoisted : tensor<2xi8>
+ util.return %hoisted : tensor<2xi8>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2, 3]> : tensor<2xi8>
@@ -178,9 +178,9 @@
// CHECK: util.global private @{{.*}} = dense<[2, 3]> : tensor<2xi16>
module @eval_i16_tensor {
util.global private @hoisted : tensor<2xi16>
- func.func @main() -> tensor<2xi16> {
+ util.func public @main() -> tensor<2xi16> {
%hoisted = util.global.load @hoisted : tensor<2xi16>
- return %hoisted : tensor<2xi16>
+ util.return %hoisted : tensor<2xi16>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2, 3]> : tensor<2xi16>
@@ -194,9 +194,9 @@
// CHECK: util.global private @{{.*}} = dense<[2, 3]> : tensor<2xi32>
module @eval_i32_tensor {
util.global private @hoisted : tensor<2xi32>
- func.func @main() -> tensor<2xi32> {
+ util.func public @main() -> tensor<2xi32> {
%hoisted = util.global.load @hoisted : tensor<2xi32>
- return %hoisted : tensor<2xi32>
+ util.return %hoisted : tensor<2xi32>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2, 3]> : tensor<2xi32>
@@ -210,9 +210,9 @@
// CHECK: util.global private @{{.*}} = dense<[2, 3]> : tensor<2xi64>
module @eval_i64_tensor {
util.global private @hoisted : tensor<2xi64>
- func.func @main() -> tensor<2xi64> {
+ util.func public @main() -> tensor<2xi64> {
%hoisted = util.global.load @hoisted : tensor<2xi64>
- return %hoisted : tensor<2xi64>
+ util.return %hoisted : tensor<2xi64>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<[2, 3]> : tensor<2xi64>
@@ -227,9 +227,9 @@
// CHECK: util.global private @{{.*}} = dense<2> : tensor<2xi64>
module @eval_i64_tensor_splat {
util.global private @hoisted : tensor<2xi64>
- func.func @main() -> tensor<2xi64> {
+ util.func public @main() -> tensor<2xi64> {
%hoisted = util.global.load @hoisted : tensor<2xi64>
- return %hoisted : tensor<2xi64>
+ util.return %hoisted : tensor<2xi64>
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant dense<2> : tensor<2xi64>
@@ -245,9 +245,9 @@
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module @serializable_attrs {
util.global private @hoisted : tensor<5x6xi8>
- func.func @main() -> tensor<5x6xi8> {
+ util.func public @main() -> tensor<5x6xi8> {
%hoisted = util.global.load @hoisted : tensor<5x6xi8>
- return %hoisted : tensor<5x6xi8>
+ util.return %hoisted : tensor<5x6xi8>
}
util.global private @constant = #util.byte_pattern<1> : tensor<5x6xi8>
// CHECK-NOT: util.initializer
diff --git a/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir b/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir
index 633c639..734165f 100644
--- a/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir
+++ b/compiler/src/iree/compiler/ConstEval/test/scalar_values.mlir
@@ -5,9 +5,9 @@
module @eval_i8_scalar {
util.global private @offset : i8 = -2 : i8
util.global private @hoisted : i8
- func.func @main() -> i8 {
+ util.func public @main() -> i8 {
%hoisted = util.global.load @hoisted : i8
- return %hoisted : i8
+ util.return %hoisted : i8
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44 : i8
@@ -24,9 +24,9 @@
module @eval_i16_scalar {
util.global private @offset : i16 = -2 : i16
util.global private @hoisted : i16
- func.func @main() -> i16 {
+ util.func public @main() -> i16 {
%hoisted = util.global.load @hoisted : i16
- return %hoisted : i16
+ util.return %hoisted : i16
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44 : i16
@@ -43,9 +43,9 @@
module @eval_i32_scalar {
util.global private @offset : i32 = -2 : i32
util.global private @hoisted : i32
- func.func @main() -> i32 {
+ util.func public @main() -> i32 {
%hoisted = util.global.load @hoisted : i32
- return %hoisted : i32
+ util.return %hoisted : i32
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44 : i32
@@ -62,9 +62,9 @@
module @eval_i64_scalar {
util.global private @offset : i64 = -2 : i64
util.global private @hoisted : i64
- func.func @main() -> i64 {
+ util.func public @main() -> i64 {
%hoisted = util.global.load @hoisted : i64
- return %hoisted : i64
+ util.return %hoisted : i64
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44 : i64
@@ -81,9 +81,9 @@
module @eval_f32_scalar {
util.global private @offset : f32 = -2.0 : f32
util.global private @hoisted : f32
- func.func @main() -> f32 {
+ util.func public @main() -> f32 {
%hoisted = util.global.load @hoisted : f32
- return %hoisted : f32
+ util.return %hoisted : f32
}
util.initializer attributes {iree.compiler.consteval} {
%cst = arith.constant 44.0 : f32
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/channel_creation.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/channel_creation.mlir
index fe9cbae..f09e8a1 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/channel_creation.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/channel_creation.mlir
@@ -7,11 +7,11 @@
// CHECK-NOT: util.global private @_mesh_mesh_1d_axes_0 {inlining_policy = #util.inline.never} : !flow.channel
mesh.mesh @mesh_1d(shape = 2)
- func.func @f(
+ util.func public @f(
%arg0 : tensor<1xi8>) -> tensor<1xi8> {
%0 = mesh.all_reduce %arg0 on @mesh_1d mesh_axes = [0] reduction = <sum>
: tensor<1xi8> -> tensor<1xi8>
- return %0 : tensor<1xi8>
+ util.return %0 : tensor<1xi8>
}
}
@@ -33,9 +33,9 @@
// CHECK: util.global.store %[[CHANNEL]], @_mesh_mesh_2d_axes_1 : !flow.channel
mesh.mesh @mesh_2d(shape = 3x4)
- func.func @f(%input : tensor<1xi8>) -> tensor<1xi8> {
+ util.func public @f(%input : tensor<1xi8>) -> tensor<1xi8> {
%out = mesh.all_reduce %input on @mesh_2d mesh_axes = [1] : tensor<1xi8> -> tensor<1xi8>
- return %out : tensor<1xi8>
+ util.return %out : tensor<1xi8>
}
}
@@ -65,9 +65,9 @@
// CHECK: util.global.store %[[CHANNEL]], @_mesh_mesh_4d_axes_2_1 : !flow.channel
mesh.mesh @mesh_4d(shape = 3x4x5x6)
- func.func @f(%input : tensor<1xi8>) -> tensor<1xi8> {
+ util.func public @f(%input : tensor<1xi8>) -> tensor<1xi8> {
%out = mesh.all_reduce %input on @mesh_4d mesh_axes = [2, 1] : tensor<1xi8> -> tensor<1xi8>
- return %out : tensor<1xi8>
+ util.return %out : tensor<1xi8>
}
}
@@ -80,10 +80,10 @@
// CHECK-DAG: util.global private @_mesh_mesh_2d_axes_1 {inlining_policy = #util.inline.never} : !flow.channel
mesh.mesh @mesh_2d(shape = 3x4)
- func.func @f(%input : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
+ util.func public @f(%input : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
%out0 = mesh.all_reduce %input on @mesh_2d mesh_axes = [0] : tensor<1xi8> -> tensor<1xi8>
%out1 = mesh.all_reduce %input on @mesh_2d mesh_axes = [1] : tensor<1xi8> -> tensor<1xi8>
- return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
+ util.return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
}
}
@@ -95,10 +95,10 @@
// CHECK: util.global private @_mesh_mesh_2d_axes_0 {inlining_policy = #util.inline.never} : !flow.channel
mesh.mesh @mesh_2d(shape = 3x4)
- func.func @f(%input0 : tensor<1xi8>, %input1 : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
+ util.func public @f(%input0 : tensor<1xi8>, %input1 : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
%out0 = mesh.all_reduce %input0 on @mesh_2d mesh_axes = [0] : tensor<1xi8> -> tensor<1xi8>
%out1 = mesh.all_reduce %input1 on @mesh_2d mesh_axes = [0] : tensor<1xi8> -> tensor<1xi8>
- return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
+ util.return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
}
}
@@ -122,9 +122,9 @@
// CHECK: util.global.store %[[CHANNEL]], @_mesh_mesh2_axes_1 : !flow.channel
mesh.mesh @mesh2(shape = 3x4)
- func.func @f(%input0 : tensor<1xi8>, %input1 : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
+ util.func public @f(%input0 : tensor<1xi8>, %input1 : tensor<1xi8>) -> (tensor<1xi8>, tensor<1xi8>) {
%out0 = mesh.all_reduce %input0 on @mesh1 mesh_axes = [0] : tensor<1xi8> -> tensor<1xi8>
%out1 = mesh.all_reduce %input1 on @mesh2 mesh_axes = [1] : tensor<1xi8> -> tensor<1xi8>
- return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
+ util.return %out0, %out1 : tensor<1xi8>, tensor<1xi8>
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/collectives.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/collectives.mlir
index 60d16ab..ef7eb2d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/collectives.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/MeshToFlow/test/collectives.mlir
@@ -2,8 +2,8 @@
mesh.mesh @mesh_2d(shape = 3x4)
-// CHECK-LABEL: func @all_gather_non_default_channel
-func.func @all_gather_non_default_channel(
+// CHECK-LABEL: util.func public @all_gather_non_default_channel
+ util.func public @all_gather_non_default_channel(
// CHECK-SAME: %[[ARG:.*]]: tensor<3x4xi8>
%arg0 : tensor<3x4xi8>) -> tensor<3x16xi8> {
// CHECK-DAG: %[[CHANNEL:.*]] = util.global.load @_mesh_mesh_2d_axes_1 : !flow.channel
@@ -19,16 +19,16 @@
// CHECK-SAME: ins(%[[ALL_GATHER_RES]] : tensor<16x3xi8>) outs(%[[RES_INIT_VAL]] : tensor<3x16xi8>) permutation = [1, 0]
%0 = mesh.all_gather %arg0 on @mesh_2d mesh_axes = [1] gather_axis = 1
: tensor<3x4xi8> -> tensor<3x16xi8>
- // CHECK: return %[[RES]] : tensor<3x16xi8>
- return %0 : tensor<3x16xi8>
+ // CHECK: util.return %[[RES]] : tensor<3x16xi8>
+ util.return %0 : tensor<3x16xi8>
}
// -----
mesh.mesh @mesh_1d(shape = 2)
-// CHECK-LABEL: func @all_reduce_sum_default_channel
-func.func @all_reduce_sum_default_channel(
+// CHECK-LABEL: util.func public @all_reduce_sum_default_channel
+ util.func public @all_reduce_sum_default_channel(
// CHECK-SAME: %[[ARG:.*]]: tensor<1xi8>
%arg0 : tensor<1xi8>) -> tensor<1xi8> {
// CHECK: %[[CHANNEL:.*]] = flow.channel.default : !flow.channel
@@ -37,16 +37,16 @@
// CHECK-SAME: (tensor<1xi8>, tensor<1xi8>, !flow.channel) -> %[[INITIAL_VAL]] as tensor<1xi8>
%0 = mesh.all_reduce %arg0 on @mesh_1d mesh_axes = [0]
: tensor<1xi8> -> tensor<1xi8>
- // CHECK: return %[[RES]] : tensor<1xi8>
- return %0 : tensor<1xi8>
+ // CHECK: util.return %[[RES]] : tensor<1xi8>
+ util.return %0 : tensor<1xi8>
}
// -----
mesh.mesh @mesh_2d(shape = 2x2)
-// CHECK-LABEL: func @all_reduce_min_non_default_channel
-func.func @all_reduce_min_non_default_channel(
+// CHECK-LABEL: util.func public @all_reduce_min_non_default_channel
+ util.func public @all_reduce_min_non_default_channel(
// CHECK-SAME: %[[ARG:.*]]: tensor<1xi8>
%arg0 : tensor<1xi8>) -> tensor<1xi8> {
// CHECK-DAG: %[[CHANNEL:.*]] = util.global.load @_mesh_mesh_2d_axes_1_0 : !flow.channel
@@ -55,16 +55,16 @@
// CHECK-SAME: (tensor<1xi8>, tensor<1xi8>, !flow.channel) -> %[[INITIAL_VAL]] as tensor<1xi8>
%0 = mesh.all_reduce %arg0 on @mesh_2d mesh_axes = [1, 0] reduction = <min>
: tensor<1xi8> -> tensor<1xi8>
- // CHECK: return %[[RES]] : tensor<1xi8>
- return %0 : tensor<1xi8>
+ // CHECK: util.return %[[RES]] : tensor<1xi8>
+ util.return %0 : tensor<1xi8>
}
// -----
mesh.mesh @mesh_1d(shape = 2)
-// CHECK-LABEL: func @all_reduce_f32
-func.func @all_reduce_f32(
+// CHECK-LABEL: util.func public @all_reduce_f32
+ util.func public @all_reduce_f32(
// CHECK-SAME: %[[ARG:.*]]: tensor<1xf32>
%arg0 : tensor<1xf32>) -> tensor<1xf32> {
// CHECK-DAG: %[[CHANNEL:.*]] = flow.channel.default : !flow.channel
@@ -73,29 +73,29 @@
// CHECK-SAME: (tensor<1xf32>, tensor<1xf32>, !flow.channel) -> %[[INITIAL_VAL]] as tensor<1xf32>
%0 = mesh.all_reduce %arg0 on @mesh_1d mesh_axes = [0]
: tensor<1xf32> -> tensor<1xf32>
- // CHECK: return %[[RES]] : tensor<1xf32>
- return %0 : tensor<1xf32>
+ // CHECK: util.return %[[RES]] : tensor<1xf32>
+ util.return %0 : tensor<1xf32>
}
// -----
mesh.mesh @mesh_1d(shape = 2)
-// CHECK-LABEL: func @process_linear_index
-func.func @process_linear_index() -> index {
+// CHECK-LABEL: util.func public @process_linear_index
+ util.func public @process_linear_index() -> index {
// CHECK: %[[CHANNEL:.*]] = flow.channel.default : !flow.channel
// CHECK: %[[RES:.*]] = flow.channel.rank %[[CHANNEL]] : index
%0 = mesh.process_linear_index on @mesh_1d : index
- // CHECK: return %[[RES]] : index
- return %0 : index
+ // CHECK: util.return %[[RES]] : index
+ util.return %0 : index
}
// -----
mesh.mesh @mesh_3d(shape = 2x3x4)
-// CHECK-LABEL: func @all_to_all_non_default_channel
-func.func @all_to_all_non_default_channel(
+// CHECK-LABEL: util.func public @all_to_all_non_default_channel
+ util.func public @all_to_all_non_default_channel(
// CHECK-SAME: %[[ARG:.*]]: tensor<1x12x3x4x5xf32>
%arg0 : tensor<1x12x3x4x5xf32>) -> tensor<1x2x3x24x5xf32> {
// CHECK: %[[CHANNEL:.*]] = util.global.load @_mesh_mesh_3d_axes_1_0 : !flow.channel
@@ -113,16 +113,16 @@
// CHECK-SAME-LITERAL: [[0], [1], [2], [3, 4], [5]] : tensor<1x2x3x6x4x5xf32> into tensor<1x2x3x24x5xf32>
%0 = mesh.all_to_all %arg0 on @mesh_3d mesh_axes = [1, 0] split_axis = 1 concat_axis = 3
: tensor<1x12x3x4x5xf32> -> tensor<1x2x3x24x5xf32>
- // CHECK: return %[[COLLAPSED_SPLIT_COUNT_INTO_CONCAT_AXIS]] : tensor<1x2x3x24x5xf32>
- return %0 : tensor<1x2x3x24x5xf32>
+ // CHECK: util.return %[[COLLAPSED_SPLIT_COUNT_INTO_CONCAT_AXIS]] : tensor<1x2x3x24x5xf32>
+ util.return %0 : tensor<1x2x3x24x5xf32>
}
// -----
mesh.mesh @mesh_2d(shape = 2x2)
-// CHECK-LABEL: func @reduce_scatter_non_default_channel
-func.func @reduce_scatter_non_default_channel(
+// CHECK-LABEL: util.func public @reduce_scatter_non_default_channel
+ util.func public @reduce_scatter_non_default_channel(
// CHECK-SAME: %[[ARG:.*]]: tensor<3x2xi8>
%arg0 : tensor<3x2xi8>) -> tensor<3x1xi8> {
// CHECK-DAG: %[[CHANNEL:.*]] = util.global.load @_mesh_mesh_2d_axes_0 : !flow.channel
@@ -138,6 +138,6 @@
// CHECK-SAME: ins(%[[REDUCE_SCATTER_RES]] : tensor<1x3xi8>) outs(%[[RES_INIT_VAL]] : tensor<3x1xi8>) permutation = [1, 0]
%0 = mesh.reduce_scatter %arg0 on @mesh_2d mesh_axes = [0] scatter_axis = 1
: tensor<3x2xi8> -> tensor<3x1xi8>
- // CHECK: return %[[RES]] : tensor<3x1xi8>
- return %0 : tensor<3x1xi8>
+ // CHECK: util.return %[[RES]] : tensor<3x1xi8>
+ util.return %0 : tensor<3x1xi8>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/bitcast.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/bitcast.mlir
index c7a4c1b..49d4527 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/bitcast.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/bitcast.mlir
@@ -1,19 +1,19 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @static_tensor_bitcast(%arg0: tensor<4x4xf32>) -> tensor<4x4xi32> {
+ util.func public @static_tensor_bitcast(%arg0: tensor<4x4xf32>) -> tensor<4x4xi32> {
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.bitcast %arg0 : tensor<4x4xf32> -> tensor<4x4xi32>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.bitcast %arg0 : tensor<4x4xf32> to tensor<4x4xi32>
- return %0 : tensor<4x4xi32>
+ util.return %0 : tensor<4x4xi32>
}
// -----
-func.func @dynamic_tensor_bitcast(%arg0: tensor<?x?xf32>) -> tensor<?x?xi32> {
+ util.func public @dynamic_tensor_bitcast(%arg0: tensor<?x?xf32>) -> tensor<?x?xi32> {
// CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %c0 : tensor<?x?xf32>
// CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %c1 : tensor<?x?xf32>
// CHECK: %[[RESULT:.+]] = flow.tensor.bitcast %arg0 : tensor<?x?xf32>{%[[DIM0]], %[[DIM1]]} -> tensor<?x?xi32>{%[[DIM0]], %[[DIM1]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.bitcast %arg0 : tensor<?x?xf32> to tensor<?x?xi32>
- return %0 : tensor<?x?xi32>
+ util.return %0 : tensor<?x?xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/cast.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/cast.mlir
index 2e9aea5..3fbb254 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/cast.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/cast.mlir
@@ -1,38 +1,38 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @static_tensor_cast_to_dynamic(%arg0: tensor<4x4xf32>) -> tensor<?x?xf32> {
+ util.func public @static_tensor_cast_to_dynamic(%arg0: tensor<4x4xf32>) -> tensor<?x?xf32> {
// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %arg0 : tensor<4x4xf32> -> tensor<?x?xf32>{%[[C4]], %[[C4]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
// -----
-func.func @dynamic_tensor_cast_to_static(%arg0: tensor<?xf32>) -> tensor<4xf32> {
+ util.func public @dynamic_tensor_cast_to_static(%arg0: tensor<?xf32>) -> tensor<4xf32> {
// CHECK: %[[C4:.*]] = arith.constant 4 : index
// CHECK: %[[RESULT:.*]] = flow.tensor.reshape %arg0 : tensor<?xf32>{%[[C4]]} -> tensor<4xf32>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.cast %arg0 : tensor<?xf32> to tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
-func.func @dynamic_tensor_cast_to_dynamic(%arg0: tensor<?x?xf32>) -> tensor<?x3xf32> {
+ util.func public @dynamic_tensor_cast_to_dynamic(%arg0: tensor<?x?xf32>) -> tensor<?x3xf32> {
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[D0:.*]] = tensor.dim %arg0, %[[C0]] : tensor<?x?xf32>
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK: %[[RESULT:.*]] = flow.tensor.reshape %arg0 : tensor<?x?xf32>{%[[D0]], %[[C3]]} -> tensor<?x3xf32>{%[[D0]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.cast %arg0 : tensor<?x?xf32> to tensor<?x3xf32>
- return %0 : tensor<?x3xf32>
+ util.return %0 : tensor<?x3xf32>
}
// -----
-func.func @tensor_cast_within_dispatch_workgroups_not_converted() -> tensor<f32> {
+ util.func public @tensor_cast_within_dispatch_workgroups_not_converted() -> tensor<f32> {
%x = arith.constant 100 : index
%0 = flow.dispatch.workgroups[%x]() : () -> (tensor<f32>) = () {
// CHECK: = tensor.cast %[[source:.+]] : tensor<4x4xf32> to tensor<?x?xf32>
@@ -41,5 +41,5 @@
"test.sink"(%2) : (tensor<?x?xf32>) -> ()
flow.return
}
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract.mlir
index ed6ccce..34ff6f5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract.mlir
@@ -1,17 +1,17 @@
// RUN: iree-opt --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @tensor_extract(%arg0 : tensor<1xi32>, %arg1 : index) -> i32 {
+ util.func public @tensor_extract(%arg0 : tensor<1xi32>, %arg1 : index) -> i32 {
// CHECK: %[[RESULT:.*]] = flow.tensor.load %arg0[%arg1] : tensor<1xi32>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%extract = tensor.extract %arg0[%arg1] : tensor<1xi32>
- return %extract : i32
+ util.return %extract : i32
}
// -----
-func.func @tensor_extract_i1(%arg0 : tensor<1xi1>, %arg1 : index) -> i1 {
+ util.func public @tensor_extract_i1(%arg0 : tensor<1xi1>, %arg1 : index) -> i1 {
// CHECK: %[[RESULT:.*]] = flow.tensor.load %arg0[%arg1] : tensor<1xi1>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%extract = tensor.extract %arg0[%arg1] : tensor<1xi1>
- return %extract : i1
+ util.return %extract : i1
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract_slice.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract_slice.mlir
index 476cc58..8d1ea5e 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract_slice.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/extract_slice.mlir
@@ -1,11 +1,11 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @extract_slice1(%arg0 : tensor<5x24x48xf32>) -> tensor<4xf32> {
+ util.func public @extract_slice1(%arg0 : tensor<5x24x48xf32>) -> tensor<4xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 4] [1, 1, 4] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
-// CHECK-LABEL: func.func @extract_slice1(
+// CHECK-LABEL: util.func public @extract_slice1(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x24x48xf32>)
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
@@ -13,16 +13,16 @@
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[C3]], %[[C4]] for %[[C1]], %[[C1]], %[[C4]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @extract_slice2(%arg0 : tensor<5x24x48xf32>) -> tensor<2x48xf32> {
+ util.func public @extract_slice2(%arg0 : tensor<5x24x48xf32>) -> tensor<2x48xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 0] [1, 2, 48] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<2x48xf32>
- return %0 : tensor<2x48xf32>
+ util.return %0 : tensor<2x48xf32>
}
-// CHECK-LABEL: func.func @extract_slice2
+// CHECK-LABEL: util.func public @extract_slice2
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x24x48xf32>)
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -31,36 +31,36 @@
// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[C3]], %[[C0]] for %[[C1]], %[[C2]], %[[C48]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @extract_slice3(%arg0 : tensor<5x24x48xf32>) -> tensor<2x24xf32> {
+ util.func public @extract_slice3(%arg0 : tensor<5x24x48xf32>) -> tensor<2x24xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 0] [1, 2, 24] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<2x24xf32>
- return %0 : tensor<2x24xf32>
+ util.return %0 : tensor<2x24xf32>
}
-// CHECK-LABEL: func.func @extract_slice3
+// CHECK-LABEL: util.func public @extract_slice3
// CHECK: tensor.extract_slice
// -----
-func.func @extract_slice4(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<2x24xf32> {
+ util.func public @extract_slice4(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<2x24xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 0] [1, 2, 24] [1, %arg1, 1]
: tensor<5x24x48xf32> to tensor<2x24xf32>
- return %0 : tensor<2x24xf32>
+ util.return %0 : tensor<2x24xf32>
}
-// CHECK-LABEL: func.func @extract_slice4
+// CHECK-LABEL: util.func public @extract_slice4
// CHECK: tensor.extract_slice
// -----
-func.func @extract_slice5(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<2x48xf32> {
+ util.func public @extract_slice5(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<2x48xf32> {
%0 = tensor.extract_slice %arg0[2, %arg1, 0] [1, 2, 48] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<2x48xf32>
- return %0 : tensor<2x48xf32>
+ util.return %0 : tensor<2x48xf32>
}
-// CHECK-LABEL: func.func @extract_slice5(
+// CHECK-LABEL: util.func public @extract_slice5(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x24x48xf32>
// CHECK-SAME: %[[ARG1:.+]]: index)
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
@@ -69,16 +69,16 @@
// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[ARG1]], %[[C0]] for %[[C1]], %[[C2]], %[[C48]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @extract_slice6(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<?x48xf32> {
+ util.func public @extract_slice6(%arg0 : tensor<5x24x48xf32>, %arg1 : index) -> tensor<?x48xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 0] [1, %arg1, 48] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<?x48xf32>
- return %0 : tensor<?x48xf32>
+ util.return %0 : tensor<?x48xf32>
}
-// CHECK-LABEL: func.func @extract_slice6(
+// CHECK-LABEL: util.func public @extract_slice6(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x24x48xf32>
// CHECK-SAME: %[[ARG1:.+]]: index)
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
@@ -88,16 +88,16 @@
// CHECK-DAG: %[[C48:.+]] = arith.constant 48 : index
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[C3]], %[[C0]] for %[[C1]], %[[ARG1]], %[[C48]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @extract_slice7(%arg0 : tensor<5x?x48xf32>, %arg1 : index) -> tensor<2x48xf32> {
+ util.func public @extract_slice7(%arg0 : tensor<5x?x48xf32>, %arg1 : index) -> tensor<2x48xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 0] [1, 2, 48] [1, 1, 1]
: tensor<5x?x48xf32> to tensor<2x48xf32>
- return %0 : tensor<2x48xf32>
+ util.return %0 : tensor<2x48xf32>
}
-// CHECK-LABEL: func.func @extract_slice7(
+// CHECK-LABEL: util.func public @extract_slice7(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x?x48xf32>
// CHECK-SAME: %[[ARG1:.+]]: index)
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
@@ -108,15 +108,15 @@
// CHECK-DAG: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<5x?x48xf32>
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[C3]], %[[C0]] for %[[C1]], %[[C2]], %[[C48]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @rank_reducing_extract_slice(%arg0: tensor<?x513xi32>) -> tensor<513xi32> {
+ util.func public @rank_reducing_extract_slice(%arg0: tensor<?x513xi32>) -> tensor<513xi32> {
%0 = tensor.extract_slice %arg0[4, 0] [1, 513] [1, 1] : tensor<?x513xi32> to tensor<513xi32>
- return %0 : tensor<513xi32>
+ util.return %0 : tensor<513xi32>
}
-// CHECK-LABEL: func.func @rank_reducing_extract_slice
+// CHECK-LABEL: util.func public @rank_reducing_extract_slice
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -127,16 +127,16 @@
// CHECK-SAME: [%[[C4]], %[[C0]] for %[[C1]], %[[C513]]]
// CHECK-SAME: : tensor<?x513xi32>{%[[DIM]]} -> tensor<1x513xi32>
// CHECK: %[[RESHAPE:.+]] = flow.tensor.reshape %[[SLICE]] : tensor<1x513xi32> -> tensor<513xi32>
-// CHECK: return %[[RESHAPE]] : tensor<513xi32>
+// CHECK: util.return %[[RESHAPE]] : tensor<513xi32>
// -----
-func.func @rank_reducing_extract_slice_trailing_unit_dims
+ util.func public @rank_reducing_extract_slice_trailing_unit_dims
(%arg0 : tensor<1x50x20x1xf32>) -> tensor<49x20xf32> {
%0 = tensor.extract_slice %arg0[0, 1, 0, 0] [1, 49, 20, 1] [1, 1, 1, 1] : tensor<1x50x20x1xf32> to tensor<49x20xf32>
- return %0 : tensor<49x20xf32>
+ util.return %0 : tensor<49x20xf32>
}
-// CHECK-LABEL: func.func @rank_reducing_extract_slice_trailing_unit_dims
+// CHECK-LABEL: util.func public @rank_reducing_extract_slice_trailing_unit_dims
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C49:.+]] = arith.constant 49 : index
@@ -146,7 +146,7 @@
// -----
-func.func @extract_slice_within_dispatch_workgroups_not_converted() -> tensor<f32> {
+ util.func public @extract_slice_within_dispatch_workgroups_not_converted() -> tensor<f32> {
%x = arith.constant 100 : index
%0 = flow.dispatch.workgroups[%x]() : () -> (tensor<f32>) = () {
// CHECK: = tensor.extract_slice %[[source:.+]][2, 3, 4] [1, 1, 4] [1, 1, 1] : tensor<5x24x48xf32> to tensor<4xf32>
@@ -156,5 +156,5 @@
"test.sink"(%2) : (tensor<4xf32>) -> ()
flow.return
}
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/fill.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/fill.mlir
index 18b49c8..2b50f17 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/fill.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/fill.mlir
@@ -1,17 +1,17 @@
// RUN: iree-opt --iree-flow-convert-to-flow --split-input-file %s | FileCheck %s
-func.func @tensor_reshape(%arg0 : tensor<?x4x?x5x?x6xf32>, %arg1 : tensor<20x?x40xf32>)
+ util.func public @tensor_reshape(%arg0 : tensor<?x4x?x5x?x6xf32>, %arg1 : tensor<20x?x40xf32>)
-> (tensor<?x5x?xf32>, tensor<5x4x?x4x2x4x5xf32>)
{
%0 = tensor.collapse_shape %arg0 [[0, 1, 2], [3], [4, 5]]
: tensor<?x4x?x5x?x6xf32> into tensor<?x5x?xf32>
%1 = tensor.expand_shape %arg1 [[0, 1], [2, 3], [4, 5, 6]]
: tensor<20x?x40xf32> into tensor<5x4x?x4x2x4x5xf32>
- return %0, %1 : tensor<?x5x?xf32>, tensor<5x4x?x4x2x4x5xf32>
+ util.return %0, %1 : tensor<?x5x?xf32>, tensor<5x4x?x4x2x4x5xf32>
}
-// CHECK-LABEL: func.func @tensor_reshape
+// CHECK-LABEL: util.func public @tensor_reshape
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x4x?x5x?x6xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<20x?x40xf32>
// CHECK-DAG: %[[R0:.+]] = flow.tensor.reshape %[[ARG0]]
// CHECK-DAG: %[[R1:.+]] = flow.tensor.reshape %[[ARG1]]
-// CHECK: return %[[R0]], %[[R1]]
+// CHECK: util.return %[[R0]], %[[R1]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/from_elements.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/from_elements.mlir
index f74dd18..a32e890 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/from_elements.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/from_elements.mlir
@@ -1,28 +1,28 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-// CHECK: func.func @tensor.from_elements__to__flow.tensor.splat(%[[arg0:.*]]: i8)
-func.func @tensor.from_elements__to__flow.tensor.splat(%arg0: i8) -> (i8) {
+// CHECK: util.func public @tensor.from_elements__to__flow.tensor.splat(%[[arg0:.*]]: i8)
+ util.func public @tensor.from_elements__to__flow.tensor.splat(%arg0: i8) -> (i8) {
// CHECK: %[[splat_res:.*]] = flow.tensor.splat %[[arg0]] : tensor<1xi8>
%0 = tensor.from_elements %arg0 : tensor<1xi8>
// CHECK: flow.tensor.load %[[splat_res]]
%1 = flow.tensor.load %0 : tensor<1xi8>
- return %1 : i8
+ util.return %1 : i8
}
// -----
-// CHECK: func.func @tensor.from_elements__not_convertible(%[[arg0:.*]]: i8)
-func.func @tensor.from_elements__not_convertible(%arg0: i8) -> (i8) {
+// CHECK: util.func public @tensor.from_elements__not_convertible(%[[arg0:.*]]: i8)
+ util.func public @tensor.from_elements__not_convertible(%arg0: i8) -> (i8) {
// CHECK: %[[c0:.*]] = arith.constant 0
%c0 = arith.constant 0 : index
// CHECK: %[[res:.*]] = tensor.from_elements %[[arg0]], %[[arg0]] : tensor<2xi8>
%0 = tensor.from_elements %arg0, %arg0 : tensor<2xi8>
// CHECK: flow.tensor.load %[[res]][%[[c0]]]
%1 = flow.tensor.load %0[%c0] : tensor<2xi8>
- return %1 : i8
+ util.return %1 : i8
}
// -----
-func.func @tensor.from_elements__within_dispatch_workgroups_not_converted() -> tensor<f32> {
+ util.func public @tensor.from_elements__within_dispatch_workgroups_not_converted() -> tensor<f32> {
%x = arith.constant 100 : index
%0 = flow.dispatch.workgroups[%x]() : () -> (tensor<f32>) = () {
// CHECK: = tensor.from_elements %[[source:.+]] : tensor<1xi8>
@@ -31,16 +31,16 @@
"test.sink"(%2) : (tensor<1xi8>) -> ()
flow.return
}
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// -----
-func.func @tensor.from_elements_0D(%arg0 : f32) -> tensor<f32> {
+ util.func public @tensor.from_elements_0D(%arg0 : f32) -> tensor<f32> {
%0 = tensor.from_elements %arg0 : tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
-// CHECK: func.func @tensor.from_elements_0D
+// CHECK: util.func public @tensor.from_elements_0D
// CHECK-SAME: %[[ARG0:.+]]: f32
// CHECK: %[[SPLAT:.+]] = flow.tensor.splat %[[ARG0]] : tensor<f32>
-// CHECK: return %[[SPLAT]]
+// CHECK: util.return %[[SPLAT]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert.mlir
index b63497c..a83fc1f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert.mlir
@@ -1,26 +1,26 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @insert_convert_zero_ranked_tensor
+ util.func public @insert_convert_zero_ranked_tensor
(%arg0 : tensor<i64>) -> tensor<i64> {
%c0_i64 = arith.constant 0 : i64
%0 = tensor.insert %c0_i64 into %arg0[] : tensor<i64>
- return %0 : tensor<i64>
+ util.return %0 : tensor<i64>
}
-// CHECK-LABEL: func.func @insert_convert_zero_ranked_tensor
+// CHECK-LABEL: util.func public @insert_convert_zero_ranked_tensor
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0_I64:.+]] = arith.constant 0 : i64
// CHECK: %[[UPDATE:.+]] = flow.tensor.store %[[C0_I64]], %[[ARG0]] : tensor<i64>
// -----
-func.func @insert_convert
+ util.func public @insert_convert
(%arg0 : tensor<2x3xi64>) -> tensor<2x3xi64> {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%0 = tensor.insert %c0_i64 into %arg0[%c0, %c0] : tensor<2x3xi64>
- return %0 : tensor<2x3xi64>
+ util.return %0 : tensor<2x3xi64>
}
-// CHECK-LABEL: func.func @insert_convert
+// CHECK-LABEL: util.func public @insert_convert
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C0_I64:.+]] = arith.constant 0 : i64
@@ -28,14 +28,14 @@
// -----
-func.func @insert_convert_dynamic_dims
+ util.func public @insert_convert_dynamic_dims
(%arg0 : tensor<?x3xi64>) -> tensor<?x3xi64> {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%0 = tensor.insert %c0_i64 into %arg0[%c0, %c0] : tensor<?x3xi64>
- return %0 : tensor<?x3xi64>
+ util.return %0 : tensor<?x3xi64>
}
-// CHECK-LABEL: func.func @insert_convert_dynamic_dims
+// CHECK-LABEL: util.func public @insert_convert_dynamic_dims
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C0_I64:.+]] = arith.constant 0 : i64
@@ -44,7 +44,7 @@
// -----
-func.func @insert_within_dispatch_workgroups_not_converted() -> tensor<f32> {
+ util.func public @insert_within_dispatch_workgroups_not_converted() -> tensor<f32> {
%x = arith.constant 100 : index
%0 = flow.dispatch.workgroups[%x]() : () -> (tensor<f32>) = () {
%c0 = arith.constant 0 : index
@@ -55,5 +55,5 @@
"test.sink"(%2) : (tensor<2x3xi64>) -> ()
flow.return
}
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert_slice.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert_slice.mlir
index 6f96112..662ab69 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert_slice.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/insert_slice.mlir
@@ -1,14 +1,14 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-convert-to-flow %s | FileCheck %s
-func.func @insert_slice_convert
+ util.func public @insert_slice_convert
(%arg0 : tensor<?x24x48xf32>, %arg1 : tensor<1x4x48xf32>) ->
tensor<?x24x48xf32> {
%c0 = arith.constant 0 : index
%0 = tensor.insert_slice %arg1 into %arg0[4, 2, 0] [1, 4, 48] [1, 1, 1] :
tensor<1x4x48xf32> into tensor<?x24x48xf32>
- return %0 : tensor<?x24x48xf32>
+ util.return %0 : tensor<?x24x48xf32>
}
-// CHECK-LABEL: func.func @insert_slice_convert
+// CHECK-LABEL: util.func public @insert_slice_convert
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
@@ -20,15 +20,15 @@
// -----
-func.func @insert_slice_convert_rank_reducing
+ util.func public @insert_slice_convert_rank_reducing
(%arg0 : tensor<?x24x48xf32>, %arg1 : tensor<4x48xf32>) ->
tensor<?x24x48xf32> {
%c0 = arith.constant 0 : index
%0 = tensor.insert_slice %arg1 into %arg0[4, 2, 0] [1, 4, 48] [1, 1, 1] :
tensor<4x48xf32> into tensor<?x24x48xf32>
- return %0 : tensor<?x24x48xf32>
+ util.return %0 : tensor<?x24x48xf32>
}
-// CHECK-LABEL: func.func @insert_slice_convert_rank_reducing
+// CHECK-LABEL: util.func public @insert_slice_convert_rank_reducing
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
@@ -41,12 +41,12 @@
// -----
-func.func @rank_reducing_insert_slice_trailing_unit_dims
+ util.func public @rank_reducing_insert_slice_trailing_unit_dims
(%arg0 : tensor<49x20xf32>, %arg1 : tensor<1x50x20x1xf32>) -> tensor<1x50x20x1xf32> {
%0 = tensor.insert_slice %arg0 into %arg1[0, 1, 0, 0] [1, 49, 20, 1] [1, 1, 1, 1] : tensor<49x20xf32> into tensor<1x50x20x1xf32>
- return %0 : tensor<1x50x20x1xf32>
+ util.return %0 : tensor<1x50x20x1xf32>
}
-// CHECK-LABEL: func.func @rank_reducing_insert_slice_trailing_unit_dims
+// CHECK-LABEL: util.func public @rank_reducing_insert_slice_trailing_unit_dims
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[RESHAPE:.+]] = flow.tensor.reshape %{{.+}} : tensor<49x20xf32> -> tensor<1x49x20x1xf32>
@@ -55,8 +55,8 @@
// -----
-// CHECK-LABEL: func.func @insert_slice_within_dispatch_workgroups_not_converted
-func.func @insert_slice_within_dispatch_workgroups_not_converted() -> tensor<f32> {
+// CHECK-LABEL: util.func public @insert_slice_within_dispatch_workgroups_not_converted
+ util.func public @insert_slice_within_dispatch_workgroups_not_converted() -> tensor<f32> {
%x = arith.constant 100 : index
%0 = flow.dispatch.workgroups[%x]() : () -> (tensor<f32>) = () {
// CHECK: = tensor.insert_slice %[[source2:.+]] into %[[source1:.+]][4, 2, 0] [1, 4, 48] [1, 1, 1] : tensor<1x4x48xf32> into tensor<?x24x48xf32>
@@ -67,19 +67,19 @@
"test.sink"(%3) : (tensor<?x24x48xf32>) -> ()
flow.return
}
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// -----
-func.func @insert_slice_convert_dynamic_offset_and_size
+ util.func public @insert_slice_convert_dynamic_offset_and_size
(%target: tensor<?x24x48xf32>, %slice: tensor<1x?x48xf32>, %offset: index, %size: index) ->
tensor<?x24x48xf32> {
%0 = tensor.insert_slice %slice into %target[%offset, 2, 0] [1, %size, 48] [1, 1, 1] :
tensor<1x?x48xf32> into tensor<?x24x48xf32>
- return %0 : tensor<?x24x48xf32>
+ util.return %0 : tensor<?x24x48xf32>
}
-// CHECK-LABEL: func.func @insert_slice_convert_dynamic_offset_and_size
+// CHECK-LABEL: util.func public @insert_slice_convert_dynamic_offset_and_size
// CHECK-SAME: %[[TARGET:[a-zA-Z0-9_]+]]
// CHECK-SAME: %[[SLICE:[a-zA-Z0-9_]+]]
// CHECK-SAME: %[[OFFSET:[a-zA-Z0-9_]+]]
@@ -92,8 +92,8 @@
// -----
-// CHECK-LABEL: func.func @insert_slice_dynamic_tensor_result_not_converted
-func.func @insert_slice_dynamic_tensor_result_not_converted
+// CHECK-LABEL: util.func public @insert_slice_dynamic_tensor_result_not_converted
+ util.func public @insert_slice_dynamic_tensor_result_not_converted
(%arg0: tensor<?x24x48xf32>, %arg1: tensor<1x4x48xf32>, %offset: index) ->
tensor<?x24x48xf32> {
%x = arith.constant 100 : index
@@ -106,6 +106,6 @@
// CHECK: %[[INSERTED_TENSOR:.+]] = tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 2, 0] [1, 4, 48] [1, 1, 1]
%2 = tensor.insert_slice %arg1 into %arg0[%idx, 2, 0] [1, 4, 48] [1, 1, 1] :
tensor<1x4x48xf32> into tensor<?x24x48xf32>
- // CHECK: return %[[INSERTED_TENSOR]] : tensor<?x24x48xf32>
- return %2 : tensor<?x24x48xf32>
+ // CHECK: util.return %[[INSERTED_TENSOR]] : tensor<?x24x48xf32>
+ util.return %2 : tensor<?x24x48xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/reshape.mlir b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/reshape.mlir
index cfc50ab..c85bd56 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/reshape.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow/test/reshape.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt --iree-flow-convert-to-flow --split-input-file %s | FileCheck %s
-func.func @turn_fill_into_splat(%arg0: tensor<?x?xf32>, %arg1: tensor<f32>, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> tensor<?x?xf32> {
+ util.func public @turn_fill_into_splat(%arg0: tensor<?x?xf32>, %arg1: tensor<f32>, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = tensor.extract %arg1[] : tensor<f32>
@@ -11,11 +11,11 @@
%5 = tensor.empty(%3, %4) : tensor<?x?xf32>
%6 = linalg.fill ins(%0 : f32) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
%7 = flow.tensor.update %arg0, %6[%arg2, %arg3] : tensor<?x?xf32>{%1, %2} -> %6 as tensor<?x?xf32>{%3, %4}
- return %7 : tensor<?x?xf32>
+ util.return %7 : tensor<?x?xf32>
}
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 + s1)>
-// CHECK: func.func @turn_fill_into_splat
+// CHECK: util.func public @turn_fill_into_splat
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
@@ -34,17 +34,17 @@
// -----
-func.func @static_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x8xf32> {
+ util.func public @static_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x8xf32> {
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %arg0 : tensor<2x4xf32> -> tensor<1x8xf32>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.reshape %arg0(%arg1)
: (tensor<2x4xf32>, tensor<2xindex>) -> tensor<1x8xf32>
- return %0 : tensor<1x8xf32> }
+ util.return %0 : tensor<1x8xf32> }
// -----
- func.func @dynamic_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<?x?xf32> {
- // CHECK: func.func @dynamic_tensor_reshape
+ util.func public @dynamic_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<?x?xf32> {
+ // CHECK: util.func public @dynamic_tensor_reshape
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x4xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2xindex>
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
@@ -52,29 +52,29 @@
// CHECK-DAG: %[[VAL:.+]] = flow.tensor.load %[[ARG1]][%[[C0]]] : tensor<2xindex>
// CHECK-DAG: %[[VAL1:.+]] = flow.tensor.load %[[ARG1]][%[[C1]]] : tensor<2xindex>
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %[[ARG0]] : tensor<2x4xf32> -> tensor<?x?xf32>{%[[VAL]], %[[VAL1]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.reshape %arg0(%arg1)
: (tensor<2x4xf32>, tensor<2xindex>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32> }
+ util.return %0 : tensor<?x?xf32> }
// -----
- func.func @mix_dynamic_and_static_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x?xf32> {
- // CHECK: func.func @mix_dynamic_and_static_tensor_reshape
+ util.func public @mix_dynamic_and_static_tensor_reshape(%arg0: tensor<2x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x?xf32> {
+ // CHECK: util.func public @mix_dynamic_and_static_tensor_reshape
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x4xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2xindex>
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[VAL:.+]] = flow.tensor.load %[[ARG1]][%[[C1]]] : tensor<2xindex>
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %[[ARG0]] : tensor<2x4xf32> -> tensor<1x?xf32>{%[[VAL]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.reshape %arg0(%arg1)
: (tensor<2x4xf32>, tensor<2xindex>) -> tensor<1x?xf32>
- return %0 : tensor<1x?xf32> }
+ util.return %0 : tensor<1x?xf32> }
// -----
- func.func @dynamic_input_and_output_tensor_reshape(%arg0: tensor<?x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x?xf32> {
- // CHECK: func.func @dynamic_input_and_output_tensor_reshape
+ util.func public @dynamic_input_and_output_tensor_reshape(%arg0: tensor<?x4xf32>, %arg1: tensor<2xindex>) -> tensor<1x?xf32> {
+ // CHECK: util.func public @dynamic_input_and_output_tensor_reshape
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x4xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2xindex>
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
@@ -82,18 +82,18 @@
// CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x4xf32>
// CHECK-DAG: %[[VAL:.+]] = flow.tensor.load %[[ARG1]][%[[C1]]] : tensor<2xindex>
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %[[ARG0]] : tensor<?x4xf32>{%[[D0]]} -> tensor<1x?xf32>{%[[VAL]]}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.reshape %arg0(%arg1)
: (tensor<?x4xf32>, tensor<2xindex>) -> tensor<1x?xf32>
- return %0 : tensor<1x?xf32> }
+ util.return %0 : tensor<1x?xf32> }
// -----
- func.func @from_elements_test_reshape(%arg0: tensor<?x4xf32>, %arg1: index, %arg2: index) -> tensor<?x1xf32> {
+ util.func public @from_elements_test_reshape(%arg0: tensor<?x4xf32>, %arg1: index, %arg2: index) -> tensor<?x1xf32> {
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[D1:.*]] = tensor.dim %arg0, %[[C0:.*]] : tensor<?x4xf32>
// CHECK-DAG: %[[RESULT:.*]] = flow.tensor.reshape %arg0 : tensor<?x4xf32>{%[[D1]]} -> tensor<?x1xf32>{%arg1}
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.from_elements %arg1, %arg2 : tensor<2xindex>
%1 = tensor.reshape %arg0(%0)
: (tensor<?x4xf32>, tensor<2xindex>) -> tensor<?x1xf32>
- return %1 : tensor<?x1xf32> }
+ util.return %1 : tensor<?x1xf32> }
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index 6266615..6ad3b9c 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
@@ -1510,8 +1510,10 @@
state.addAttribute("function_type", TypeAttr::get(type));
state.attributes.append(attrs.begin(), attrs.end());
state.attributes.erase(IREE::Util::TiedOpInterface::getStorageAttrName());
- state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
- tiedOperands);
+ if (tiedOperands) {
+ state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
+ tiedOperands);
+ }
state.addRegion();
if (!argAttrs.empty() || !resAttrs.empty()) {
assert(type.getNumInputs() == argAttrs.size());
@@ -1538,8 +1540,10 @@
state.addOperands(resultDims);
state.addAttributes(attributes);
state.attributes.erase(IREE::Util::TiedOpInterface::getStorageAttrName());
- state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
- tiedOperands);
+ if (tiedOperands) {
+ state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
+ tiedOperands);
+ }
state.attributes.erase(getOperandSegmentSizeAttr());
state.addAttribute(getOperandSegmentSizeAttr(),
builder.getDenseI32ArrayAttr({
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/call_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/call_ops.mlir
index 3bc7761..8f94447 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/call_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/call_ops.mlir
@@ -41,14 +41,14 @@
// CHECK-LABEL: @basicCall
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?xf32>)
-func.func @basicCall(%arg0: tensor<?xf32>) -> (tensor<?xf32>, i32) {
+util.func public @basicCall(%arg0: tensor<?xf32>) -> (tensor<?xf32>, i32) {
%c0 = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c0
%dim = tensor.dim %arg0, %c0 : tensor<?xf32>
// CHECK: %[[CALL:.+]]:2 = flow.call @basicExtern(%[[ARG0]], %[[DIM]]) : (tensor<?xf32>{%[[DIM]]}, index) -> (tensor<?xf32>{%[[DIM]]}, i32)
%call:2 = flow.call @basicExtern(%arg0, %dim) : (tensor<?xf32>{%dim}, index) -> (tensor<?xf32>{%dim}, i32)
- // CHECK: return %[[CALL]]#0, %[[CALL]]#1
- return %call#0, %call#1 : tensor<?xf32>, i32
+ // CHECK: util.return %[[CALL]]#0, %[[CALL]]#1
+ util.return %call#0, %call#1 : tensor<?xf32>, i32
}
// -----
@@ -59,14 +59,14 @@
// CHECK-LABEL: @inplaceCall
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?xf32>)
-func.func @inplaceCall(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+util.func public @inplaceCall(%arg0: tensor<?xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c0
%dim = tensor.dim %arg0, %c0 : tensor<?xf32>
// CHECK: %[[CALL:.+]] = flow.call @inplaceExtern(%[[ARG0]], %[[DIM]]) : (tensor<?xf32>{%[[DIM]]}, index) -> %[[ARG0]]{%[[DIM]]}
%call = flow.call @inplaceExtern(%arg0, %dim) : (tensor<?xf32>{%dim}, index) -> %arg0{%dim}
- // CHECK: return %[[CALL]]
- return %call : tensor<?xf32>
+ // CHECK: util.return %[[CALL]]
+ util.return %call : tensor<?xf32>
}
// -----
@@ -77,12 +77,12 @@
// CHECK-LABEL: @inplaceTypeChangeCall
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x4xf32>)
-func.func @inplaceTypeChangeCall(%arg0: tensor<?x4xf32>) -> tensor<4x?xi32> {
+util.func public @inplaceTypeChangeCall(%arg0: tensor<?x4xf32>) -> tensor<4x?xi32> {
%c0 = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c0
%dim = tensor.dim %arg0, %c0 : tensor<?x4xf32>
// CHECK: %[[CALL:.+]] = flow.call @inplaceTypeChangeExtern(%[[ARG0]], %[[DIM]]) : (tensor<?x4xf32>{%[[DIM]]}, index) -> %[[ARG0]] as tensor<4x?xi32>{%[[DIM]]}
%call = flow.call @inplaceTypeChangeExtern(%arg0, %dim) : (tensor<?x4xf32>{%dim}, index) -> %arg0 as tensor<4x?xi32>{%dim}
- // CHECK: return %[[CALL]]
- return %call : tensor<4x?xi32>
+ // CHECK: util.return %[[CALL]]
+ util.return %call : tensor<4x?xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_ops.mlir
index 7d0138e..1c69b13 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_ops.mlir
@@ -3,19 +3,19 @@
flow.executable @ex0 {
flow.executable.export @dispatch_fn
builtin.module {
- func.func @dispatch_fn(%cst : index, %arg0 : tensor<4xf32>) -> tensor<4xf32> {
- return %arg0 : tensor<4xf32>
+ util.func public @dispatch_fn(%cst : index, %arg0 : tensor<4xf32>) -> tensor<4xf32> {
+ util.return %arg0 : tensor<4xf32>
}
}
}
// CHECK-LABEL: @dispatch
-func.func @dispatch(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
+util.func public @dispatch(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
// CHECK: %[[CST:.+]] = arith.constant
%cst = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @ex0::@dispatch_fn[%[[CST]]](%[[CST]], %arg0) : (index, tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @ex0::@dispatch_fn[%cst](%cst, %arg0) : (index, tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
@@ -26,10 +26,10 @@
}
// CHECK-LABEL: @dispatchWithMultipleRefs
-func.func @dispatchWithMultipleRefs(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+util.func public @dispatchWithMultipleRefs(%arg0: tensor<4xf32>) -> tensor<4xf32> {
// CHECK: = flow.dispatch {@ex0::@dispatch_a, @ex0::@dispatch_b}(%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch {@ex0::@dispatch_a, @ex0::@dispatch_b}(%arg0) : (tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
@@ -42,12 +42,12 @@
}
// CHECK-LABEL: @dispatchWithWorkgroupCount
-func.func @dispatchWithWorkgroupCount(%arg0: tensor<4xf32>, %arg1: index) -> tensor<4xf32> {
+util.func public @dispatchWithWorkgroupCount(%arg0: tensor<4xf32>, %arg1: index) -> tensor<4xf32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
// CHECK: = flow.dispatch @ex0::@dispatch[%c1, %c2](%arg0, %arg1) : (tensor<4xf32>, index) -> tensor<4xf32>
%0 = flow.dispatch @ex0::@dispatch[%c1, %c2](%arg0, %arg1) : (tensor<4xf32>, index) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
@@ -58,40 +58,40 @@
}
}
-func.func @dispatchWithInvalidWorkload(%arg0: tensor<4xf32>, %arg1: index) -> tensor<4xf32> {
+util.func public @dispatchWithInvalidWorkload(%arg0: tensor<4xf32>, %arg1: index) -> tensor<4xf32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
// expected-error @+1 {{op workload mismatch; entry point expects 1 arguments but dispatch provides 2}}
%0 = flow.dispatch @ex0::@dispatch[%c1, %c2](%arg0, %arg1) : (tensor<4xf32>, index) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
// CHECK-LABEL: @dispatchNoWorkload
-func.func @dispatchNoWorkload(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
+util.func public @dispatchNoWorkload(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
// CHECK: %[[CST:.+]] = arith.constant
%cst = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @ex0::@dispatch_fn(%[[CST]], %arg0) : (index, tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @ex0::@dispatch_fn(%cst, %arg0) : (index, tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
// CHECK-LABEL: @inplaceDispatch
-func.func @inplaceDispatch(%arg0 : tensor<4xf32>, %arg1 : tensor<8xf32>) -> (tensor<4xf32>, tensor<8xf32>) {
+util.func public @inplaceDispatch(%arg0 : tensor<4xf32>, %arg1 : tensor<8xf32>) -> (tensor<4xf32>, tensor<8xf32>) {
// CHECK: %[[CST:.+]] = arith.constant
%cst = arith.constant 4 : index
// CHECK: %0:2 = flow.dispatch @ex0::@dispatch_fn[%[[CST]]](%[[CST]], %arg0, %arg1) : (index, tensor<4xf32>, tensor<8xf32>) -> (%arg0, %arg1)
%0, %1 = flow.dispatch @ex0::@dispatch_fn[%cst](%cst, %arg0, %arg1) : (index, tensor<4xf32>, tensor<8xf32>) -> (%arg0, %arg1)
- return %0, %1 : tensor<4xf32>, tensor<8xf32>
+ util.return %0, %1 : tensor<4xf32>, tensor<8xf32>
}
// -----
// CHECK-LABEL: @inplaceDynamicDispatch
-func.func @inplaceDynamicDispatch(%arg0 : tensor<4x?xf32>, %arg1 : tensor<8x?xf32>) -> (tensor<4x?xf32>, tensor<8x?xf32>) {
+util.func public @inplaceDynamicDispatch(%arg0 : tensor<4x?xf32>, %arg1 : tensor<8x?xf32>) -> (tensor<4x?xf32>, tensor<8x?xf32>) {
// CHECK-DAG: %[[CST:.+]] = arith.constant 4
%cst = arith.constant 4 : index
// CHECK-DAG: %[[DIM0:.+]] = arith.constant 100
@@ -100,28 +100,28 @@
%dim1 = arith.constant 200 : index
// CHECK: %0:2 = flow.dispatch @ex0::@dispatch_fn[%[[CST]]](%[[CST]], %arg0, %arg1) : (index, tensor<4x?xf32>{%[[DIM0]]}, tensor<8x?xf32>{%[[DIM1]]}) -> (%arg0{%[[DIM1]]}, %arg1{%[[DIM0]]})
%0, %1 = flow.dispatch @ex0::@dispatch_fn[%cst](%cst, %arg0, %arg1) : (index, tensor<4x?xf32>{%dim0}, tensor<8x?xf32>{%dim1}) -> (%arg0{%dim1}, %arg1{%dim0})
- return %0, %1 : tensor<4x?xf32>, tensor<8x?xf32>
+ util.return %0, %1 : tensor<4x?xf32>, tensor<8x?xf32>
}
// -----
// CHECK-LABEL: @inplaceTypeChange
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?xf32>)
-func.func @inplaceTypeChange(%arg0: tensor<4x?xf32>) -> tensor<?x4xf32> {
+util.func public @inplaceTypeChange(%arg0: tensor<4x?xf32>) -> tensor<?x4xf32> {
// CHECK-DAG: %[[CST:.+]] = arith.constant 4
%cst = arith.constant 4 : index
// CHECK-DAG: %[[DIM0:.+]] = arith.constant 100
%dim0 = arith.constant 100 : index
// CHECK: %0 = flow.dispatch @ex0::@dispatch_fn[%[[CST]]](%[[ARG0]]) : (tensor<4x?xf32>{%[[DIM0]]}) -> %arg0 as tensor<?x4xf32>{%[[DIM0]]}
%0 = flow.dispatch @ex0::@dispatch_fn[%cst](%arg0) : (tensor<4x?xf32>{%dim0}) -> %arg0 as tensor<?x4xf32>{%dim0}
- return %0 : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @region
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?xf32>)
-func.func @region(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @region(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
// CHECK: %[[R:.*]] = flow.dispatch.region -> (tensor<?x?xf32>{%{{.*}}, %{{.*}}}) {
// CHECK: flow.return %[[ARG0]] : tensor<?x?xf32>
// CHECK: }
@@ -132,30 +132,30 @@
%r = flow.dispatch.region -> (tensor<?x?xf32>{%d0, %d1}) {
flow.return %arg0 : tensor<?x?xf32>
}
- // CHECK: return %[[R]]
- return %r : tensor<?x?xf32>
+ // CHECK: util.return %[[R]]
+ util.return %r : tensor<?x?xf32>
}
// -----
// CHECK-LABEL: @regionStaticShape
// CHECK-SAME: (%[[ARG0:.+]]: tensor<5x10xf32>)
-func.func @regionStaticShape(%arg0: tensor<5x10xf32>) -> tensor<5x10xf32> {
+util.func public @regionStaticShape(%arg0: tensor<5x10xf32>) -> tensor<5x10xf32> {
// CHECK: %[[R:.*]] = flow.dispatch.region -> (tensor<5x10xf32>) {
// CHECK: flow.return %[[ARG0]] : tensor<5x10xf32>
// CHECK: }
%r = flow.dispatch.region -> (tensor<5x10xf32>) {
flow.return %arg0 : tensor<5x10xf32>
}
- // CHECK: return %[[R]]
- return %r : tensor<5x10xf32>
+ // CHECK: util.return %[[R]]
+ util.return %r : tensor<5x10xf32>
}
// -----
-// CHECK-LABEL: func.func @regionDynamicShape
+// CHECK-LABEL: util.func public @regionDynamicShape
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?x16xf32>, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[DIM2:.+]]: index, %[[DIM3:.+]]: index)
-func.func @regionDynamicShape(%arg0: tensor<?x?x16xf32>, %dim0: index, %dim1: index, %dim2: index, %dim3: index) -> tensor<?x?x16xf32> {
+util.func public @regionDynamicShape(%arg0: tensor<?x?x16xf32>, %dim0: index, %dim1: index, %dim2: index, %dim3: index) -> tensor<?x?x16xf32> {
// CHECK: %[[C16:.+]] = arith.constant 16 : index
%c16 = arith.constant 16 : index
// CHECK: %[[R:.+]] = flow.dispatch.region[%[[DIM0]], %[[DIM1]], %[[C16]]] -> (tensor<?x?x16xf32>{%[[DIM2]], %[[DIM3]]}) {
@@ -164,6 +164,6 @@
%region = flow.dispatch.region[%dim0, %dim1, %c16] -> (tensor<?x?x16xf32>{%dim2, %dim3}) {
flow.return %arg0 : tensor<?x?x16xf32>
}
- // CHECK: return %[[R]]
- return %region: tensor<?x?x16xf32>
+ // CHECK: util.return %[[R]]
+ util.return %region: tensor<?x?x16xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_tensor_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_tensor_folding.mlir
index 9d8a4f4..2927f3f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_tensor_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_tensor_folding.mlir
@@ -1,24 +1,24 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --canonicalize %s | FileCheck %s
// CHECK-LABEL: @ReuseDispatchTensorLoadShapeDims
-func.func @ReuseDispatchTensorLoadShapeDims(%arg0: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
+util.func public @ReuseDispatchTensorLoadShapeDims(%arg0: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
%arg0_tied = flow.dispatch.tie_shape %arg0 : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%arg1, %arg2}
%c0 = arith.constant 0 : index
// CHECK: flow.dispatch.tensor.load {{.+}} !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%arg1, %arg2}
%0 = flow.dispatch.tensor.load %arg0_tied, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%arg3, %arg4} -> tensor<256x1024xf32>
"test.sink"(%0) : (tensor<256x1024xf32>) -> ()
- return
+ util.return
}
// -----
-func.func @canonicalizeStaticOperands(%arg0: !flow.dispatch.tensor<readonly:tensor<4x4xf32>>) {
+util.func public @canonicalizeStaticOperands(%arg0: !flow.dispatch.tensor<readonly:tensor<4x4xf32>>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = flow.dispatch.tensor.load %arg0, offsets=[%c0, %c0], sizes=[%c2, %c2], strides=[%c1, %c1] : !flow.dispatch.tensor<readonly:tensor<4x4xf32>> -> tensor<?x?xf32>
"test.sink"(%0) : (tensor<?x?xf32>) -> ()
- return
+ util.return
}
// CHECK: @canonicalizeStaticOperand
@@ -31,13 +31,13 @@
// -----
-func.func @canonicalizePartiallyStaticOperands(%arg0: !flow.dispatch.tensor<readonly:tensor<4x4xf32>>, %offset: index, %size: index, %stride: index) {
+util.func public @canonicalizePartiallyStaticOperands(%arg0: !flow.dispatch.tensor<readonly:tensor<4x4xf32>>, %offset: index, %size: index, %stride: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = flow.dispatch.tensor.load %arg0, offsets=[%offset, %c0], sizes=[%size, %c2], strides=[%stride, %c1] : !flow.dispatch.tensor<readonly:tensor<4x4xf32>> -> tensor<?x?xf32>
"test.sink"(%0) : (tensor<?x?xf32>) -> ()
- return
+ util.return
}
// CHECK: @canonicalizePartiallyStaticOperands
@@ -51,7 +51,7 @@
// -----
-func.func @canonicalizeDispatchLoad(%arg0: !flow.dispatch.tensor<readonly:tensor<3x4x1x12x64xf32>>, %arg1 : index, %arg2: index, %arg3 : index) {
+util.func public @canonicalizeDispatchLoad(%arg0: !flow.dispatch.tensor<readonly:tensor<3x4x1x12x64xf32>>, %arg1 : index, %arg2: index, %arg3 : index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = flow.dispatch.tensor.load %arg0, offsets = [%arg1, %c0, 0, %arg2, %arg3], sizes = [1, 4, 1, 4, 32], strides = [%c1, %c1, 1, %c1, %c1] : !flow.dispatch.tensor<readonly:tensor<3x4x1x12x64xf32>> -> tensor<1x4x?x32xf32>
@@ -67,13 +67,13 @@
// -----
-func.func @canonicalizeDimOfTensorTile(%arg0: !flow.dispatch.tensor<readonly:tensor<250x1024xf32>>, %arg1 : index, %arg2: index) {
+util.func public @canonicalizeDimOfTensorTile(%arg0: !flow.dispatch.tensor<readonly:tensor<250x1024xf32>>, %arg1 : index, %arg2: index) {
%c0 = arith.constant 0 : index
%0 = affine.min affine_map<(d0) -> (64, -d0 + 250)>(%arg1)
%1 = flow.dispatch.tensor.load %arg0, offsets = [%arg2, 0], sizes = [%0, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<250x1024xf32>> -> tensor<?x1024xf32>
%2 = tensor.dim %1, %c0 : tensor<?x1024xf32>
"test.sink"(%2) : (index) -> ()
- return
+ util.return
}
// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (-s0 + 250, 64)>
@@ -84,15 +84,15 @@
// -----
-func.func @foldCastIntoStore(%arg0: !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>,
+util.func public @foldCastIntoStore(%arg0: !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>,
%arg1 : tensor<3x?xf32>, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index) {
%c3 = arith.constant 3 : index
%0 = tensor.cast %arg1 : tensor<3x?xf32> to tensor<?x?xf32>
flow.dispatch.tensor.store %0, %arg0, offsets = [3, 4, 5], sizes = [%c3, 1, %arg2], strides = [1, 1, 1]
: tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%arg3, %arg4, %arg5}
- return
+ util.return
}
-// CHECK: func @foldCastIntoStore
+// CHECK: util.func public @foldCastIntoStore
// CHECK-SAME: %[[ARG0:.+]]: !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>
// CHECK-SAME: %[[ARG1:.+]]: tensor<3x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups.mlir
index 35bafd1..2ff07fe 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file %s | iree-opt --allow-unregistered-dialect --split-input-file | FileCheck %s
// CHECK-LABEL: @complexWorkgroupsUsage
-func.func @complexWorkgroupsUsage(
+util.func public @complexWorkgroupsUsage(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x4xf32>
%arg0 : tensor<?x4xf32>,
// CHECK-SAME: %[[ARG1:.+]]: index
@@ -56,14 +56,14 @@
// CHECK-NEXT: flow.return
flow.return
}
- // CHECK: return %[[OUTER_RET0]] : tensor<4x?xf32>
- return %0 : tensor<4x?xf32>
+ // CHECK: util.return %[[OUTER_RET0]] : tensor<4x?xf32>
+ util.return %0 : tensor<4x?xf32>
}
// -----
// CHECK-LABEL: @inplaceDispatch
-func.func @inplaceDispatch(
+util.func public @inplaceDispatch(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x4xf32>
%arg0: tensor<?x4xf32>,
// CHECK-SAME: %[[ARG1:.+]]: index
@@ -88,15 +88,15 @@
// CHECK-NEXT: flow.return
flow.return
}
- // CHECK: return %[[OUTER_RET0]] : tensor<?x4xf32>
- return %0 : tensor<?x4xf32>
+ // CHECK: util.return %[[OUTER_RET0]] : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @dispatchWithCountRegion
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4xi32>)
-func.func @dispatchWithCountRegion(%arg0: tensor<4xi32>) -> tensor<4xi32> {
+util.func public @dispatchWithCountRegion(%arg0: tensor<4xi32>) -> tensor<4xi32> {
// CHECK-DAG: %[[WORKGROUP_COUNT_X:.+]] = arith.constant 100
%x = arith.constant 100 : index
// CHECK-DAG: %[[WORKGROUP_COUNT_Y:.+]] = arith.constant 50
@@ -117,6 +117,6 @@
// CHECK-NEXT: flow.return %[[X_CAPTURE]], %[[Y_CAPTURE]], %[[Z]]
flow.return %x_capture, %y_capture, %z : index, index, index
}
- // CHECK: return %[[OUTER_RET0]] : tensor<4xi32>
- return %0 : tensor<4xi32>
+ // CHECK: util.return %[[OUTER_RET0]] : tensor<4xi32>
+ util.return %0 : tensor<4xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
index 3ec11fb..6b1c799 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/dispatch_workgroups_folding.mlir
@@ -1,8 +1,8 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --canonicalize --cse %s | iree-opt --allow-unregistered-dialect --split-input-file | FileCheck %s
-// CHECK-LABEL: func.func @dontInlineReadWrite
+// CHECK-LABEL: util.func public @dontInlineReadWrite
// CHECK-SAME: (%[[ARG0:.+]]: tensor<1x4xf32>)
-func.func @dontInlineReadWrite(%arg0: tensor<1x4xf32>) -> tensor<4x8xf32> {
+util.func public @dontInlineReadWrite(%arg0: tensor<1x4xf32>) -> tensor<4x8xf32> {
// CHECK: %[[CST:.+]] = arith.constant dense<0.000000e+00> : tensor<4x8xf32>
%cst = arith.constant dense<0.0> : tensor<4x8xf32>
%x = arith.constant 100 : index
@@ -19,13 +19,13 @@
flow.dispatch.tensor.store %0, %arg1_capture, offsets=[0, 0], sizes=[4, 8], strides=[1, 1] : tensor<4x8xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x8xf32>>
flow.return
}
- return %0 : tensor<4x8xf32>
+ util.return %0 : tensor<4x8xf32>
}
// -----
-// CHECK-LABEL: func.func @remove_unused_result
-func.func @remove_unused_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
+// CHECK-LABEL: util.func public @remove_unused_result
+util.func public @remove_unused_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1]() : () -> tensor<i32> =
// CHECK-NEXT: (%{{.+}}: !flow.dispatch.tensor<writeonly:tensor<i32>>)
@@ -44,13 +44,13 @@
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:tensor<i32>>
flow.return
}
- return %0#0 : tensor<i32>
+ util.return %0#0 : tensor<i32>
}
// -----
-// CHECK-LABEL: func.func @remove_unused_dynamic_result
-func.func @remove_unused_dynamic_result(%dim: index) -> (tensor<i32>) {
+// CHECK-LABEL: util.func public @remove_unused_dynamic_result
+util.func public @remove_unused_dynamic_result(%dim: index) -> (tensor<i32>) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1]() : () -> tensor<i32> =
// CHECK-NEXT: (%{{.+}}: !flow.dispatch.tensor<writeonly:tensor<i32>>)
@@ -73,13 +73,13 @@
flow.dispatch.tensor.store %ret1_value, %ret1_shaped, offsets = [0], sizes = [%dim], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%dim}
flow.return
}
- return %0#0 : tensor<i32>
+ util.return %0#0 : tensor<i32>
}
// -----
-// CHECK-LABEL: func.func @remove_unused_read_write_result
-func.func @remove_unused_read_write_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
+// CHECK-LABEL: util.func public @remove_unused_read_write_result
+util.func public @remove_unused_read_write_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1]() : () -> tensor<i32> =
// CHECK-NEXT: (%{{.+}}: !flow.dispatch.tensor<writeonly:tensor<i32>>)
@@ -98,13 +98,13 @@
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<readwrite:tensor<i32>>
flow.return
}
- return %0#0 : tensor<i32>
+ util.return %0#0 : tensor<i32>
}
// -----
-// CHECK-LABEL: func.func @keep_used_read_write_result
-func.func @keep_used_read_write_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
+// CHECK-LABEL: util.func public @keep_used_read_write_result
+util.func public @keep_used_read_write_result(%arg0 : tensor<9xi32>, %arg1 : tensor<9xi32>) -> (tensor<i32>) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1]() : () -> (tensor<i32>, tensor<i32>) =
// CHECK-NEXT: (%{{.+}}: !flow.dispatch.tensor<writeonly:tensor<i32>>, %{{.+}}: !flow.dispatch.tensor<readwrite:tensor<i32>>)
@@ -121,13 +121,13 @@
flow.dispatch.tensor.store %4, %arg3, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<readwrite:tensor<i32>>
flow.return
}
- return %0#0 : tensor<i32>
+ util.return %0#0 : tensor<i32>
}
// -----
-// CHECK-LABEL: func.func @drop_unused_dispatch_region_result
-func.func @drop_unused_dispatch_region_result(
+// CHECK-LABEL: util.func public @drop_unused_dispatch_region_result
+util.func public @drop_unused_dispatch_region_result(
%arg0: tensor<?x?xf32>, %arg1: tensor<5x10xf32>, %arg2: tensor<7x11xf32>)
-> tensor<?x?xf32>
{
@@ -144,14 +144,14 @@
%1 = tensor.insert_slice %arg2 into %0[9, 10][7, 11][1, 1] : tensor<7x11xf32> into tensor<?x?xf32>
flow.return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
}
- // CHECK: return %[[r]]
- return %r#0 : tensor<?x?xf32>
+ // CHECK: util.return %[[r]]
+ util.return %r#0 : tensor<?x?xf32>
}
// -----
-// CHECK-LABEL: func @bubble_up_ordinal_ops(
-func.func @bubble_up_ordinal_ops(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
+// CHECK-LABEL: util.func public @bubble_up_ordinal_ops(
+util.func public @bubble_up_ordinal_ops(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
%result = flow.dispatch.workgroups[%arg0, %arg1](%arg0, %arg1) : (index, index) -> (tensor<?x?xf32>{%arg0, %arg1}) =
(%b0 : index, %b1 : index, %b2 : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>) {
// CHECK: flow.dispatch.workgroups
@@ -174,13 +174,13 @@
: tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%wl0, %wl1}
flow.return
}
- return %result : tensor<?x?xf32>
+ util.return %result : tensor<?x?xf32>
}
// -----
-// CHECK-LABEL: func @dedup_workgroup_count_from_slice_operands(
-func.func @dedup_workgroup_count_from_slice_operands(
+// CHECK-LABEL: util.func public @dedup_workgroup_count_from_slice_operands(
+util.func public @dedup_workgroup_count_from_slice_operands(
%arg0 : index, %arg1 : index, %arg2 : index) -> tensor<?x?x?x?x?xf32> {
%result = flow.dispatch.workgroups [%arg0, %arg1, %arg2](%arg0, %arg1, %arg2)
: (index, index, index) -> tensor<?x?x?x?x?xf32>{%arg0, %arg1, %arg2, %arg2, %arg0} =
@@ -211,16 +211,16 @@
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %b0, %b1, %b2, %b2, %b0
flow.return %x, %y, %z : index, index, index
}
- return %result :tensor<?x?x?x?x?xf32>
+ util.return %result :tensor<?x?x?x?x?xf32>
}
// -----
-// CHECK-LABEL: func @dedup_workload(
+// CHECK-LABEL: util.func public @dedup_workload(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index)
-func.func @dedup_workload(
+util.func public @dedup_workload(
%arg0 : index, %arg1 : index, %arg2 : index) -> tensor<?x?x?x?x?xf32> {
%result = flow.dispatch.workgroups [%arg0, %arg1, %arg2, %arg2, %arg0](%arg0, %arg1, %arg2)
: (index, index, index) -> tensor<?x?x?x?x?xf32>{%arg0, %arg1, %arg2, %arg2, %arg0} =
@@ -251,5 +251,5 @@
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %b0, %b1, %b2, %b3, %b4
flow.return %x, %y, %z : index, index, index
}
- return %result :tensor<?x?x?x?x?xf32>
+ util.return %result :tensor<?x?x?x?x?xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/resolve_dim_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/resolve_dim_ops.mlir
index a7321c3..25cffde 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/resolve_dim_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/resolve_dim_ops.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt -resolve-ranked-shaped-type-result-dims -split-input-file %s | FileCheck %s
-func.func @tensor_load_op() -> (index, index) {
+util.func public @tensor_load_op() -> (index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = hal.interface.constant.load[0] : index
@@ -11,9 +11,9 @@
: !flow.dispatch.tensor<readonly:tensor<?x1x1x?xf32>>{%0, %1} -> tensor<?x?xf32>
%4 = tensor.dim %3, %c0 : tensor<?x?xf32>
%5 = tensor.dim %3, %c1 : tensor<?x?xf32>
- return %4, %5 : index, index
+ util.return %4, %5 : index, index
}
-// CHECK-LABEL: func @tensor_load_op()
+// CHECK-LABEL: util.func public @tensor_load_op()
// CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load[0]
// CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load[1]
-// CHECK: return %[[D0]], %[[D1]]
+// CHECK: util.return %[[D0]], %[[D1]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
index 57a972e..a07f17c 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --canonicalize %s | FileCheck %s
// CHECK-LABEL: @expandStaticShapeConstant
-func.func @expandStaticShapeConstant() -> (tensor<2x4xi32>, index, index) {
+util.func public @expandStaticShapeConstant() -> (tensor<2x4xi32>, index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[CST:.+]] = arith.constant dense<2> : tensor<2x4xi32>
@@ -10,14 +10,14 @@
%d0 = tensor.dim %0, %c0 : tensor<2x4xi32>
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
%d1 = tensor.dim %0, %c1 : tensor<2x4xi32>
- // CHECK: return %[[CST]], %[[C2]], %[[C4]]
- return %0, %d0, %d1 : tensor<2x4xi32>, index, index
+ // CHECK: util.return %[[CST]], %[[C2]], %[[C4]]
+ util.return %0, %d0, %d1 : tensor<2x4xi32>, index, index
}
// -----
// CHECK-LABEL: @expandDynamicShapeConstant
-func.func @expandDynamicShapeConstant() -> (tensor<?x?xi32>, index, index) {
+util.func public @expandDynamicShapeConstant() -> (tensor<?x?xi32>, index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[CST:.+]] = arith.constant dense<2> : tensor<2x4xi32>
@@ -29,93 +29,93 @@
%0 = flow.tensor.constant dense<2> : tensor<2x4xi32> -> tensor<?x?xi32>
%d0 = tensor.dim %0, %c0 : tensor<?x?xi32>
%d1 = tensor.dim %0, %c1 : tensor<?x?xi32>
- // CHECK: return %[[T]], %[[D0]], %[[D1]]
- return %0, %d0, %d1 : tensor<?x?xi32>, index, index
+ // CHECK: util.return %[[T]], %[[D0]], %[[D1]]
+ util.return %0, %d0, %d1 : tensor<?x?xi32>, index, index
}
// -----
// CHECK-LABEL: @tieShapeStaticZeroElements
-func.func @tieShapeStaticZeroElements(%arg0: tensor<0xi32>) -> tensor<0xi32> {
+util.func public @tieShapeStaticZeroElements(%arg0: tensor<0xi32>) -> tensor<0xi32> {
// CHECK-NOT: flow.tensor.tie_shape
%0 = flow.tensor.tie_shape %arg0 : tensor<0xi32>
- // CHECK: return %arg0
- return %0 : tensor<0xi32>
+ // CHECK: util.return %arg0
+ util.return %0 : tensor<0xi32>
}
// -----
// CHECK-LABEL: @tieShapeDynamicZeroElements
// CHECK-SAME: (%[[OPERAND:.+]]: tensor<0x?xi32>, %[[DIM:.+]]: index)
-func.func @tieShapeDynamicZeroElements(%arg0: tensor<0x?xi32>, %dim: index) -> tensor<0x?xi32> {
+util.func public @tieShapeDynamicZeroElements(%arg0: tensor<0x?xi32>, %dim: index) -> tensor<0x?xi32> {
// CHECK-NOT: flow.tensor.tie_shape
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<0x?xi32>{%[[DIM]]}
%0 = flow.tensor.tie_shape %arg0 : tensor<0x?xi32>{%dim}
- // CHECK: return %[[RET]]
- return %0 : tensor<0x?xi32>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : tensor<0x?xi32>
}
// -----
// CHECK-LABEL: @reshapeNoOpScalar
-func.func @reshapeNoOpScalar(%arg0: tensor<f32>) -> tensor<f32> {
- // CHECK-NEXT: return %arg0 : tensor<f32>
+util.func public @reshapeNoOpScalar(%arg0: tensor<f32>) -> tensor<f32> {
+ // CHECK-NEXT: util.return %arg0 : tensor<f32>
%0 = flow.tensor.reshape %arg0 : tensor<f32> -> tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// -----
// CHECK-LABEL: @reshapeNoOpStatic
-func.func @reshapeNoOpStatic(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
- // CHECK-NEXT: return %arg0 : tensor<4x4xf32>
+util.func public @reshapeNoOpStatic(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+ // CHECK-NEXT: util.return %arg0 : tensor<4x4xf32>
%0 = flow.tensor.reshape %arg0 : tensor<4x4xf32> -> tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
+ util.return %0 : tensor<4x4xf32>
}
// -----
// CHECK-LABEL: @bitcastSameBitWidth
-func.func @bitcastSameBitWidth(%arg0: tensor<f32>) -> tensor<i32> {
+util.func public @bitcastSameBitWidth(%arg0: tensor<f32>) -> tensor<i32> {
// CHECK-NEXT: flow.tensor.bitcast %arg0
%0 = flow.tensor.bitcast %arg0 : tensor<f32> -> tensor<i32>
- return %0 : tensor<i32>
+ util.return %0 : tensor<i32>
}
// -----
// CHECK-LABEL: @reshapeRankDifferent
-func.func @reshapeRankDifferent(%arg0: tensor<1xf32>) -> tensor<f32> {
+util.func public @reshapeRankDifferent(%arg0: tensor<1xf32>) -> tensor<f32> {
// CHECK-NEXT: flow.tensor.reshape %arg0
%0 = flow.tensor.reshape %arg0 : tensor<1xf32> -> tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// -----
// CHECK-LABEL: @reshapeStaticDifferent
-func.func @reshapeStaticDifferent(%arg0: tensor<1x4xf32>) -> tensor<4x1xf32> {
+util.func public @reshapeStaticDifferent(%arg0: tensor<1x4xf32>) -> tensor<4x1xf32> {
// CHECK-NEXT: flow.tensor.reshape %arg0
%0 = flow.tensor.reshape %arg0 : tensor<1x4xf32> -> tensor<4x1xf32>
- return %0 : tensor<4x1xf32>
+ util.return %0 : tensor<4x1xf32>
}
// -----
// CHECK-LABEL: @reshapeNoOpDynamic
-func.func @reshapeNoOpDynamic(%arg0: tensor<4x?xf32>, %dim: index) -> tensor<4x?xf32> {
- // CHECK-NEXT: return %arg0 : tensor<4x?xf32>
+util.func public @reshapeNoOpDynamic(%arg0: tensor<4x?xf32>, %dim: index) -> tensor<4x?xf32> {
+ // CHECK-NEXT: util.return %arg0 : tensor<4x?xf32>
%0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim} -> tensor<4x?xf32>{%dim}
- return %0 : tensor<4x?xf32>
+ util.return %0 : tensor<4x?xf32>
}
// -----
// CHECK-LABEL: @reshapeDynamicDifferent
-func.func @reshapeDynamicDifferent(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index) -> tensor<4x?xf32> {
+util.func public @reshapeDynamicDifferent(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index) -> tensor<4x?xf32> {
// CHECK-NEXT: flow.tensor.reshape %arg0
%0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<4x?xf32>{%dim1}
- return %0 : tensor<4x?xf32>
+ util.return %0 : tensor<4x?xf32>
}
// -----
@@ -123,12 +123,12 @@
// CHECK-LABEL: @flattenReshapeChain
// CHECK-SAME: %[[ARG:.+]]: tensor<4x?xf32>,
// CHECK-SAME: %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[DIM2:.+]]: index
-func.func @flattenReshapeChain(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index, %dim2: index) -> tensor<4x?xf32> {
+util.func public @flattenReshapeChain(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index, %dim2: index) -> tensor<4x?xf32> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.reshape %[[ARG]] : tensor<4x?xf32>{%[[DIM0]]} -> tensor<4x?xf32>{%[[DIM2]]}
%0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<4x?xf32>{%dim1}
%1 = flow.tensor.reshape %0 : tensor<4x?xf32>{%dim1} -> tensor<4x?xf32>{%dim2}
- // CHECK-NEXT: return %[[RET]]
- return %1 : tensor<4x?xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %1 : tensor<4x?xf32>
}
// -----
@@ -136,12 +136,12 @@
// CHECK-LABEL: @flattenReshapeBitcastChain
// CHECK-SAME: %[[ARG:.+]]: tensor<4x?xi16>,
// CHECK-SAME: %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[DIM2:.+]]: index
-func.func @flattenReshapeBitcastChain(%arg0: tensor<4x?xi16>, %dim0: index, %dim1: index, %dim2: index) -> tensor<4x?xbf16> {
+util.func public @flattenReshapeBitcastChain(%arg0: tensor<4x?xi16>, %dim0: index, %dim1: index, %dim2: index) -> tensor<4x?xbf16> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.bitcast %[[ARG]] : tensor<4x?xi16>{%[[DIM0]]} -> tensor<4x?xbf16>{%[[DIM2]]}
%0 = flow.tensor.bitcast %arg0 : tensor<4x?xi16>{%dim0} -> tensor<4x?xf16>{%dim1}
%1 = flow.tensor.bitcast %0 : tensor<4x?xf16>{%dim1} -> tensor<4x?xbf16>{%dim2}
- // CHECK-NEXT: return %[[RET]]
- return %1 : tensor<4x?xbf16>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %1 : tensor<4x?xbf16>
}
// -----
@@ -149,12 +149,12 @@
// CHECK-LABEL: @flattenBitCastChain
// CHECK-SAME: %[[ARG:.+]]: tensor<?x4xi16>,
// CHECK-SAME: %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[DIM2:.+]]: index
-func.func @flattenBitCastChain(%arg0: tensor<?x4xi16>, %dim0: index, %dim1: index, %dim2: index) -> tensor<?x8xi8> {
+util.func public @flattenBitCastChain(%arg0: tensor<?x4xi16>, %dim0: index, %dim1: index, %dim2: index) -> tensor<?x8xi8> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.bitcast %[[ARG]] : tensor<?x4xi16>{%[[DIM0]]} -> tensor<?x8xi8>{%[[DIM2]]}
%0 = flow.tensor.bitcast %arg0 : tensor<?x4xi16>{%dim0} -> tensor<?x2xi32>{%dim1}
%1 = flow.tensor.bitcast %0 : tensor<?x2xi32>{%dim1} -> tensor<?x8xi8>{%dim2}
- // CHECK-NEXT: return %[[RET]]
- return %1 : tensor<?x8xi8>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %1 : tensor<?x8xi8>
}
// -----
@@ -162,13 +162,13 @@
// CHECK-LABEL: @flattenBitCastReshapeBitCast
// CHECK-SAME: %[[ARG:.+]]: tensor<?x16xi16>,
// CHECK-SAME: %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[DIM2:.+]]: index, %[[DIM3:.+]]: index
-func.func @flattenBitCastReshapeBitCast(%arg0: tensor<?x16xi16>, %dim0: index, %dim1: index, %dim2: index, %dim3: index) -> tensor<?x4x4xi16> {
+util.func public @flattenBitCastReshapeBitCast(%arg0: tensor<?x16xi16>, %dim0: index, %dim1: index, %dim2: index, %dim3: index) -> tensor<?x4x4xi16> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.reshape %[[ARG]] : tensor<?x16xi16>{%[[DIM0]]} -> tensor<?x4x4xi16>{%[[DIM3]]}
%0 = flow.tensor.bitcast %arg0 : tensor<?x16xi16>{%dim0} -> tensor<?x8xi32>{%dim1}
%1 = flow.tensor.reshape %0 : tensor<?x8xi32>{%dim1} -> tensor<?x4x2xi32>{%dim2}
%2 = flow.tensor.bitcast %1 : tensor<?x4x2xi32>{%dim2} -> tensor<?x4x4xi16>{%dim3}
- // CHECK-NEXT: return %[[RET]]
- return %2 : tensor<?x4x4xi16>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %2 : tensor<?x4x4xi16>
}
@@ -176,88 +176,88 @@
// CHECK-LABEL: @reshapeFromStaticZeroElements
// CHECK-SAME: (%[[OPERAND:.+]]: tensor<4x0xf32>, %[[DIM:.+]]: index)
-func.func @reshapeFromStaticZeroElements(%arg0: tensor<4x0xf32>, %dim: index) -> tensor<4x?xf32> {
+util.func public @reshapeFromStaticZeroElements(%arg0: tensor<4x0xf32>, %dim: index) -> tensor<4x?xf32> {
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<4x?xf32>{%[[DIM]]}
%0 = flow.tensor.reshape %arg0 : tensor<4x0xf32> -> tensor<4x?xf32>{%dim}
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<4x?xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<4x?xf32>
}
// -----
// CHECK-LABEL: @reshapeFromDynamicZeroElements
// CHECK-SAME: (%[[OPERAND:.+]]: tensor<0x?xf32>, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
-func.func @reshapeFromDynamicZeroElements(%arg0: tensor<0x?xf32>, %dim0: index, %dim1: index) -> tensor<4x?xf32> {
+util.func public @reshapeFromDynamicZeroElements(%arg0: tensor<0x?xf32>, %dim0: index, %dim1: index) -> tensor<4x?xf32> {
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<4x?xf32>{%[[DIM1]]}
%0 = flow.tensor.reshape %arg0 : tensor<0x?xf32>{%dim0} -> tensor<4x?xf32>{%dim1}
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<4x?xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<4x?xf32>
}
// -----
// CHECK-LABEL: @reshapeToStaticZeroElements
-func.func @reshapeToStaticZeroElements(%arg0: tensor<4x?xf32>, %dim0: index) {
+util.func public @reshapeToStaticZeroElements(%arg0: tensor<4x?xf32>, %dim0: index) {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<4x0xf32>
%0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<4x0xf32>
// CHECK-NEXT: util.optimization_barrier %[[RET]]
util.optimization_barrier %0 : tensor<4x0xf32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @reshapeToDynamicZeroElements
// CHECK-SAME: (%[[OPERAND:.+]]: tensor<4x?xf32>, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
-func.func @reshapeToDynamicZeroElements(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index) {
+util.func public @reshapeToDynamicZeroElements(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index) {
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<0x?xf32>{%[[DIM1]]}
%0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<0x?xf32>{%dim1}
// CHECK-NEXT: util.optimization_barrier %[[RET]]
util.optimization_barrier %0 : tensor<0x?xf32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @reshapeEmpty
// CHECK-SAME: (%[[DIM:.+]]: index)
-func.func @reshapeEmpty(%dim: index) -> tensor<?xi32> {
+util.func public @reshapeEmpty(%dim: index) -> tensor<?xi32> {
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<?xi32>{%[[DIM]]}
%0 = flow.tensor.empty : tensor<1x?xi32>{%dim}
// CHECK-NOT: flow.tensor.reshape
%1 = flow.tensor.reshape %0 : tensor<1x?xi32>{%dim} -> tensor<?xi32>{%dim}
- // CHECK: return %[[RET]]
- return %1 : tensor<?xi32>
+ // CHECK: util.return %[[RET]]
+ util.return %1 : tensor<?xi32>
}
// -----
// CHECK-LABEL: @loadConst
-func.func @loadConst() -> i32 {
+util.func public @loadConst() -> i32 {
%0 = arith.constant dense<[[0, 1], [2, 3]]> : tensor<2x2xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : i32
%2 = flow.tensor.load %0[%c1, %c0] : tensor<2x2xi32>
- // CHECK-NEXT: return %[[C2]]
- return %2 : i32
+ // CHECK-NEXT: util.return %[[C2]]
+ util.return %2 : i32
}
// -----
// CHECK-LABEL: @loadConstScalar
-func.func @loadConstScalar() -> i32 {
+util.func public @loadConstScalar() -> i32 {
%0 = arith.constant dense<4> : tensor<i32>
// CHECK-NEXT: %[[C4:.+]] = arith.constant 4 : i32
%1 = flow.tensor.load %0 : tensor<i32>
- // CHECK-NEXT: return %[[C4]]
- return %1 : i32
+ // CHECK-NEXT: util.return %[[C4]]
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @storeConst
-func.func @storeConst() -> tensor<2x2xi32> {
+util.func public @storeConst() -> tensor<2x2xi32> {
%0 = arith.constant dense<[[0, 1], [2, 3]]> : tensor<2x2xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -266,27 +266,27 @@
// CHECK-SAME: [0, 1], [4, 3]
// CHECK-SAME: ]> : tensor<2x2xi32>
%1 = flow.tensor.store %c4, %0[%c1, %c0] : tensor<2x2xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<2x2xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<2x2xi32>
}
// -----
// CHECK-LABEL: @storeConstScalar
-func.func @storeConstScalar() -> tensor<i32> {
+util.func public @storeConstScalar() -> tensor<i32> {
%0 = arith.constant dense<0> : tensor<i32>
%1 = arith.constant 4 : i32
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor<i32>
%2 = flow.tensor.store %1, %0 : tensor<i32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<i32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<i32>
}
// -----
// CHECK-LABEL: @allocaDims
// CHECK-SAME: (%[[DIM:.+]]: index)
-func.func @allocaDims(%dim: index) -> (index, index, index) {
+util.func public @allocaDims(%dim: index) -> (index, index, index) {
// CHECK-NOT: flow.tensor.alloca
%0 = flow.tensor.alloca : tensor<4x?x0xf32>{%dim}
%c0 = arith.constant 0 : index
@@ -295,15 +295,15 @@
%d0 = tensor.dim %0, %c0 : tensor<4x?x0xf32>
%d1 = tensor.dim %0, %c1 : tensor<4x?x0xf32>
%d2 = tensor.dim %0, %c2 : tensor<4x?x0xf32>
- // CHECK: return %c4, %[[DIM]], %c0
- return %d0, %d1, %d2 : index, index, index
+ // CHECK: util.return %c4, %[[DIM]], %c0
+ util.return %d0, %d1, %d2 : index, index, index
}
// -----
// CHECK-LABEL: @emptyDims
// CHECK-SAME: (%[[DIM:.+]]: index)
-func.func @emptyDims(%dim: index) -> (index, index, index) {
+util.func public @emptyDims(%dim: index) -> (index, index, index) {
// CHECK-NOT: flow.tensor.empty
%0 = flow.tensor.empty : tensor<4x?x0xf32>{%dim}
%c0 = arith.constant 0 : index
@@ -312,130 +312,130 @@
%d0 = tensor.dim %0, %c0 : tensor<4x?x0xf32>
%d1 = tensor.dim %0, %c1 : tensor<4x?x0xf32>
%d2 = tensor.dim %0, %c2 : tensor<4x?x0xf32>
- // CHECK: return %c4, %[[DIM]], %c0
- return %d0, %d1, %d2 : index, index, index
+ // CHECK: util.return %c4, %[[DIM]], %c0
+ util.return %d0, %d1, %d2 : index, index, index
}
// -----
// CHECK-LABEL: @splatDynamicShape
// CHECK-SAME: (%[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
-func.func @splatDynamicShape(%dim0: index, %dim1: index) -> tensor<?x?xi32> {
+util.func public @splatDynamicShape(%dim0: index, %dim1: index) -> tensor<?x?xi32> {
// CHECK: %[[FOUR:.+]] = arith.constant 4 : i32
%four = arith.constant 4 : i32
// CHECK: %[[SPLAT:.+]] = flow.tensor.splat %[[FOUR]] : tensor<?x?xi32>{%[[DIM0]], %[[DIM1]]}
%1 = flow.tensor.splat %four : tensor<?x?xi32>{%dim0, %dim1}
- // CHECK: return %[[SPLAT]]
- return %1 : tensor<?x?xi32>
+ // CHECK: util.return %[[SPLAT]]
+ util.return %1 : tensor<?x?xi32>
}
// -----
// CHECK-LABEL: @splatStaticZeroElements
-func.func @splatStaticZeroElements(%value: f32) -> tensor<0x2xf32> {
+util.func public @splatStaticZeroElements(%value: f32) -> tensor<0x2xf32> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<0x2xf32>
%0 = flow.tensor.splat %value : tensor<0x2xf32>
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<0x2xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<0x2xf32>
}
// -----
// CHECK-LABEL: @splatDynamicZeroElements
// CHECK-SAME: (%[[VALUE:.+]]: f32, %[[DIM:.+]]: index)
-func.func @splatDynamicZeroElements(%value: f32, %dim: index) -> tensor<0x?xf32> {
+util.func public @splatDynamicZeroElements(%value: f32, %dim: index) -> tensor<0x?xf32> {
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<0x?xf32>{%[[DIM]]}
%0 = flow.tensor.splat %value : tensor<0x?xf32>{%dim}
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<0x?xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<0x?xf32>
}
// -----
// CHECK-LABEL: @cloneConst
-func.func @cloneConst() -> tensor<4xi32> {
+util.func public @cloneConst() -> tensor<4xi32> {
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<[0, 1, 2, 3]> : tensor<4xi32>
%0 = arith.constant dense<[0, 1, 2, 3]> : tensor<4xi32>
%1 = flow.tensor.clone %0 : tensor<4xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<4xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<4xi32>
}
// -----
// CHECK-LABEL: @cloneConstZeroElements
-func.func @cloneConstZeroElements() -> tensor<0x2xi32> {
+util.func public @cloneConstZeroElements() -> tensor<0x2xi32> {
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<> : tensor<0x2xi32>
%0 = arith.constant dense<> : tensor<0x2xi32>
// CHECK-NOT: flow.tensor.clone
%1 = flow.tensor.clone %0 : tensor<0x2xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<0x2xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<0x2xi32>
}
// -----
// CHECK-LABEL: @cloneStaticZeroElements
-func.func @cloneStaticZeroElements(%arg0: tensor<0x2xf32>) -> tensor<0x2xf32> {
+util.func public @cloneStaticZeroElements(%arg0: tensor<0x2xf32>) -> tensor<0x2xf32> {
// CHECK-NOT: flow.tensor.clone
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<0x2xf32>
%0 = flow.tensor.clone %arg0 : tensor<0x2xf32>
// CHECK-NEXT: %[[RET]]
- return %0 : tensor<0x2xf32>
+ util.return %0 : tensor<0x2xf32>
}
// -----
// CHECK-LABEL: @cloneDynamicZeroElements
// CHECK-SAME: (%[[OPERAND:.+]]: tensor<0x?xf32>, %[[DIM:.+]]: index)
-func.func @cloneDynamicZeroElements(%arg0: tensor<0x?xf32>, %dim: index) -> tensor<0x?xf32> {
+util.func public @cloneDynamicZeroElements(%arg0: tensor<0x?xf32>, %dim: index) -> tensor<0x?xf32> {
// CHECK-NOT: flow.tensor.clone
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<0x?xf32>{%[[DIM]]}
%0 = flow.tensor.clone %arg0 : tensor<0x?xf32>{%dim}
// CHECK-NEXT: %[[RET]]
- return %0 : tensor<0x?xf32>
+ util.return %0 : tensor<0x?xf32>
}
// -----
// CHECK-LABEL: @sliceConst0D
-func.func @sliceConst0D() -> tensor<i32> {
+util.func public @sliceConst0D() -> tensor<i32> {
%0 = arith.constant dense<0> : tensor<i32>
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<0> : tensor<i32>
%1 = flow.tensor.slice %0[for] : tensor<i32> -> tensor<i32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<i32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<i32>
}
// -----
// CHECK-LABEL: @sliceConst1D
-func.func @sliceConst1D() -> tensor<1xi32> {
+util.func public @sliceConst1D() -> tensor<1xi32> {
%0 = arith.constant dense<0> : tensor<1xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<0> : tensor<1xi32>
%1 = flow.tensor.slice %0[%c0 for %c1] : tensor<1xi32> -> tensor<1xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<1xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<1xi32>
}
// -----
// CHECK-LABEL: @sliceConst1DZeroLength
-func.func @sliceConst1DZeroLength() -> tensor<0xi32> {
+util.func public @sliceConst1DZeroLength() -> tensor<0xi32> {
%0 = arith.constant dense<0> : tensor<1xi32>
%c0 = arith.constant 0 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<> : tensor<0xi32>
%1 = flow.tensor.slice %0[%c0 for %c0] : tensor<1xi32> -> tensor<0xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<0xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<0xi32>
}
// -----
// CHECK-LABEL: @sliceConst2D
-func.func @sliceConst2D() -> tensor<1x2xi32> {
+util.func public @sliceConst2D() -> tensor<1x2xi32> {
%0 = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : tensor<2x3xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -444,63 +444,63 @@
// CHECK-SAME: [1, 2]
// CHECK-SAME: ]> : tensor<1x2xi32>
%1 = flow.tensor.slice %0[%c0, %c1 for %c1, %c2] : tensor<2x3xi32> -> tensor<1x2xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<1x2xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<1x2xi32>
}
// -----
// CHECK-LABEL: @sliceConst2DZeroLength1
-func.func @sliceConst2DZeroLength1() -> tensor<1x0xi32> {
+util.func public @sliceConst2DZeroLength1() -> tensor<1x0xi32> {
%0 = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : tensor<2x3xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<> : tensor<1x0xi32>
%1 = flow.tensor.slice %0[%c0, %c0 for %c1, %c0] : tensor<2x3xi32> -> tensor<1x0xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<1x0xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<1x0xi32>
}
// -----
// CHECK-LABEL: @sliceConst2DZeroLength01
-func.func @sliceConst2DZeroLength01() -> tensor<0x0xi32> {
+util.func public @sliceConst2DZeroLength01() -> tensor<0x0xi32> {
%0 = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : tensor<2x3xi32>
%c0 = arith.constant 0 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<> : tensor<0x0xi32>
%1 = flow.tensor.slice %0[%c0, %c0 for %c0, %c0] : tensor<2x3xi32> -> tensor<0x0xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<0x0xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<0x0xi32>
}
// -----
// CHECK-LABEL: @sliceFromZeroElements
-func.func @sliceFromZeroElements(%arg0: tensor<0xi32>) -> tensor<?xi32> {
+util.func public @sliceFromZeroElements(%arg0: tensor<0xi32>) -> tensor<?xi32> {
%c0 = arith.constant 0 : index
// CHECK-NOT: flow.tensor.slice
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<?xi32>{%c0}
%0 = flow.tensor.slice %arg0[%c0 for %c0] : tensor<0xi32> -> tensor<?xi32>{%c0}
- // CHECK: return %[[RET]]
- return %0 : tensor<?xi32>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : tensor<?xi32>
}
// -----
// CHECK-LABEL: @sliceZeroElements
-func.func @sliceZeroElements(%arg0: tensor<?xi32>, %dim: index) -> tensor<0xi32> {
+util.func public @sliceZeroElements(%arg0: tensor<?xi32>, %dim: index) -> tensor<0xi32> {
%c0 = arith.constant 0 : index
// CHECK-NOT: flow.tensor.slice
// CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<0xi32>
%0 = flow.tensor.slice %arg0[%c0 for %c0] : tensor<?xi32>{%dim} -> tensor<0xi32>
- // CHECK: return %[[RET]]
- return %0 : tensor<0xi32>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : tensor<0xi32>
}
// -----
// CHECK-LABEL: @sliceConst3D
-func.func @sliceConst3D() -> tensor<1x2x3xi32> {
+util.func public @sliceConst3D() -> tensor<1x2x3xi32> {
%0 = arith.constant dense<[[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]> : tensor<2x3x3xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -510,52 +510,52 @@
// CHECK-SAME: [
// CHECK-SAME: [3, 4, 5], [6, 7, 8]]]> : tensor<1x2x3xi32>
%1 = flow.tensor.slice %0[%c0, %c1, %c0 for %c1, %c2, %c3] : tensor<2x3x3xi32> -> tensor<1x2x3xi32>
- // CHECK-NEXT: return %[[C]]
- return %1 : tensor<1x2x3xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %1 : tensor<1x2x3xi32>
}
// -----
// CHECK-LABEL: @updateConst0D
-func.func @updateConst0D() -> tensor<i32> {
+util.func public @updateConst0D() -> tensor<i32> {
%0 = arith.constant dense<0> : tensor<i32>
%1 = arith.constant dense<1> : tensor<i32>
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<0> : tensor<i32>
%2 = flow.tensor.update %0, %1[] : tensor<i32> -> tensor<i32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<i32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<i32>
}
// -----
// CHECK-LABEL: @updateConst1D
-func.func @updateConst1D() -> tensor<1xi32> {
+util.func public @updateConst1D() -> tensor<1xi32> {
%0 = arith.constant dense<0> : tensor<1xi32>
%1 = arith.constant dense<1> : tensor<1xi32>
%c0 = arith.constant 0 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<0> : tensor<1xi32>
%2 = flow.tensor.update %0, %1[%c0] : tensor<1xi32> -> tensor<1xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<1xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<1xi32>
}
// -----
// CHECK-LABEL: @updateConst1DUpdateZeroSize
-func.func @updateConst1DUpdateZeroSize() -> tensor<1xi32> {
+util.func public @updateConst1DUpdateZeroSize() -> tensor<1xi32> {
%0 = arith.constant dense<> : tensor<0xi32>
%1 = arith.constant dense<1> : tensor<1xi32>
%c0 = arith.constant 0 : index
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<1> : tensor<1xi32>
%2 = flow.tensor.update %0, %1[%c0] : tensor<0xi32> -> tensor<1xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<1xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<1xi32>
}
// -----
// CHECK-LABEL: @updateConst2DUpdate1x1
-func.func @updateConst2DUpdate1x1() -> tensor<3x4xi32> {
+util.func public @updateConst2DUpdate1x1() -> tensor<3x4xi32> {
%0 = arith.constant dense<[[12]]> : tensor<1x1xi32>
%1 = arith.constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]> : tensor<3x4xi32>
%c0 = arith.constant 0 : index
@@ -563,14 +563,14 @@
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<[
// CHECK-SAME: [0, 12, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]> : tensor<3x4xi32>
%2 = flow.tensor.update %0, %1[%c0, %c1] : tensor<1x1xi32> -> tensor<3x4xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<3x4xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<3x4xi32>
}
// -----
// CHECK-LABEL: @updateConst2DUpdate2x2
-func.func @updateConst2DUpdate2x2() -> tensor<3x4xi32> {
+util.func public @updateConst2DUpdate2x2() -> tensor<3x4xi32> {
%0 = arith.constant dense<[[12, 13], [14, 15]]> : tensor<2x2xi32>
%1 = arith.constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]> : tensor<3x4xi32>
%c0 = arith.constant 0 : index
@@ -578,14 +578,14 @@
// CHECK-NEXT: %[[C:.+]] = arith.constant dense<[
// CHECK-SAME: [0, 12, 13, 3], [4, 14, 15, 7], [8, 9, 10, 11]]> : tensor<3x4xi32>
%2 = flow.tensor.update %0, %1[%c0, %c1] : tensor<2x2xi32> -> tensor<3x4xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<3x4xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<3x4xi32>
}
// -----
// CHECK-LABEL: @updateConst3DUpdate1x2x3
-func.func @updateConst3DUpdate1x2x3() -> tensor<2x3x3xi32> {
+util.func public @updateConst3DUpdate1x2x3() -> tensor<2x3x3xi32> {
%0 = arith.constant dense<[[[18, 19, 20], [21, 22, 23]]]> : tensor<1x2x3xi32>
%1 = arith.constant dense<[[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]> : tensor<2x3x3xi32>
%c0 = arith.constant 0 : index
@@ -595,14 +595,14 @@
// CHECK-SAME: [0, 1, 2], [18, 19, 20], [21, 22, 23]], [
// CHECK-SAME: [9, 10, 11], [12, 13, 14], [15, 16, 17]]]> : tensor<2x3x3xi32>
%2 = flow.tensor.update %0, %1[%c0, %c1, %c0] : tensor<1x2x3xi32> -> tensor<2x3x3xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<2x3x3xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<2x3x3xi32>
}
// -----
// CHECK-LABEL: @updateConst3DUpdate2x3x2
-func.func @updateConst3DUpdate2x3x2() -> tensor<2x3x3xi32> {
+util.func public @updateConst3DUpdate2x3x2() -> tensor<2x3x3xi32> {
%0 = arith.constant dense<[[[18, 19], [20, 21], [22, 23]], [[24, 25], [26, 27], [28, 29]]]> : tensor<2x3x2xi32>
%1 = arith.constant dense<[[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]> : tensor<2x3x3xi32>
%c0 = arith.constant 0 : index
@@ -612,48 +612,48 @@
// CHECK-SAME: [18, 19, 2], [20, 21, 5], [22, 23, 8]], [
// CHECK-SAME: [24, 25, 11], [26, 27, 14], [28, 29, 17]]]> : tensor<2x3x3xi32>
%2 = flow.tensor.update %0, %1[%c0, %c1, %c0] : tensor<2x3x2xi32> -> tensor<2x3x3xi32>
- // CHECK-NEXT: return %[[C]]
- return %2 : tensor<2x3x3xi32>
+ // CHECK-NEXT: util.return %[[C]]
+ util.return %2 : tensor<2x3x3xi32>
}
// -----
// CHECK-LABEL: @updateReplace
-func.func @updateReplace(%arg0 : tensor<4xi32>, %arg1 : tensor<4xi32>) -> tensor<4xi32> {
+util.func public @updateReplace(%arg0 : tensor<4xi32>, %arg1 : tensor<4xi32>) -> tensor<4xi32> {
%c0 = arith.constant 0 : index
%0 = flow.tensor.update %arg0, %arg1[%c0] : tensor<4xi32> -> tensor<4xi32>
- // CHECK-NEXT: return %arg0
- return %0 : tensor<4xi32>
+ // CHECK-NEXT: util.return %arg0
+ util.return %0 : tensor<4xi32>
}
// -----
// CHECK-LABEL: @updateIntoZeroElements
-func.func @updateIntoZeroElements(%update: tensor<?x?xi32>, %dim: index, %target: tensor<0x0xi32>) -> tensor<0x0xi32> {
+util.func public @updateIntoZeroElements(%update: tensor<?x?xi32>, %dim: index, %target: tensor<0x0xi32>) -> tensor<0x0xi32> {
%c0 = arith.constant 0 : index
// CHECK-NOT: flow.tensor.update
// CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<0x0xi32>
%0 = flow.tensor.update %update, %target[%c0, %c0] : tensor<?x?xi32>{%dim, %dim} -> tensor<0x0xi32>
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<0x0xi32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<0x0xi32>
}
// -----
// CHECK-LABEL: @updateZeroElements
// CHECK-SAME: (%[[UPDATE:.+]]: tensor<0x1xi32>, %[[TARGET:.+]]: tensor<1x1xi32>)
-func.func @updateZeroElements(%update: tensor<0x1xi32>, %target: tensor<1x1xi32>) -> tensor<1x1xi32> {
+util.func public @updateZeroElements(%update: tensor<0x1xi32>, %target: tensor<1x1xi32>) -> tensor<1x1xi32> {
%c0 = arith.constant 0 : index
// CHECK-NOT: flow.tensor.update
%0 = flow.tensor.update %update, %target[%c0, %c0] : tensor<0x1xi32> -> tensor<1x1xi32>
- // CHECK: return %[[TARGET]]
- return %0 : tensor<1x1xi32>
+ // CHECK: util.return %[[TARGET]]
+ util.return %0 : tensor<1x1xi32>
}
// -----
// CHECK-LABEL: @propogateStaticShapeOfTarget
-func.func @propogateStaticShapeOfTarget(%arg0 : tensor<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32> {
+util.func public @propogateStaticShapeOfTarget(%arg0 : tensor<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32> {
%c21 = arith.constant 21 : index
%c42 = arith.constant 42 : index
%c2 = arith.constant 2 : index
@@ -667,14 +667,14 @@
// CHECK: %[[UPDATED:.+]] = flow.tensor.update %{{.+}}, %[[TARGET]]
// CHECK: %[[RESULT:.+]] = tensor.cast %[[UPDATED]] : tensor<21x42xf32> to tensor<?x?xf32>
%1 = flow.tensor.update %arg0, %0[%c2, %c4] : tensor<?x?xf32>{%c21, %c42} -> tensor<?x?xf32>{%c21, %c42}
- // CHECK: return %[[RESULT]]
- return %1 : tensor<?x?xf32>
+ // CHECK: util.return %[[RESULT]]
+ util.return %1 : tensor<?x?xf32>
}
// -----
// CHECK-LABEL: @propogateStaticShapeOfUpdate
-func.func @propogateStaticShapeOfUpdate(%arg0 : tensor<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32> {
+util.func public @propogateStaticShapeOfUpdate(%arg0 : tensor<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32> {
%c21 = arith.constant 21 : index
%c42 = arith.constant 42 : index
%c2 = arith.constant 2 : index
@@ -687,53 +687,53 @@
} : tensor<?x?xf32>
// CHECK: %[[RESULT:.+]] = flow.tensor.update %[[UPDATE]]
%1 = flow.tensor.update %0, %arg0[%c2, %c4] : tensor<?x?xf32>{%c21, %c42} -> tensor<?x?xf32>{%c21, %c42}
- // CHECK: return %[[RESULT]]
- return %1 : tensor<?x?xf32>
+ // CHECK: util.return %[[RESULT]]
+ util.return %1 : tensor<?x?xf32>
}
// -----
// CHECK-LABEL: @foldSplatLoadIntoPrimitive
// CHECK-SAME: (%[[arg0:.+]]: f32, %[[arg1:.+]]: index, %[[arg2:.+]]: index)
-func.func @foldSplatLoadIntoPrimitive(%arg0 : f32, %arg1 : index, %arg2 : index) -> f32 {
- // CHECK-NEXT: return %[[arg0]] : f32
+util.func public @foldSplatLoadIntoPrimitive(%arg0 : f32, %arg1 : index, %arg2 : index) -> f32 {
+ // CHECK-NEXT: util.return %[[arg0]] : f32
%0 = flow.tensor.splat %arg0 : tensor<4x4xf32>
%1 = flow.tensor.load %0[%arg1, %arg2] : tensor<4x4xf32>
- return %1 : f32
+ util.return %1 : f32
}
// -----
// CHECK-LABEL: @foldSplatReshapeIntoSplat
-func.func @foldSplatReshapeIntoSplat(%arg0 : f32) -> tensor<16xf32> {
+util.func public @foldSplatReshapeIntoSplat(%arg0 : f32) -> tensor<16xf32> {
// CHECK-NEXT: %0 = flow.tensor.splat %arg0 : tensor<16xf32>
- // CHECK-NEXT: return %0 : tensor<16xf32>
+ // CHECK-NEXT: util.return %0 : tensor<16xf32>
%0 = flow.tensor.splat %arg0 : tensor<4x4xf32>
%1 = flow.tensor.reshape %0 : tensor<4x4xf32> -> tensor<16xf32>
- return %1 : tensor<16xf32>
+ util.return %1 : tensor<16xf32>
}
// -----
// CHECK-LABEL: @foldSplatReshapeIntoSplatDynamic
-func.func @foldSplatReshapeIntoSplatDynamic(%arg0 : f32, %arg1 : index, %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
+util.func public @foldSplatReshapeIntoSplatDynamic(%arg0 : f32, %arg1 : index, %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
// CHECK-NEXT: %0 = flow.tensor.splat %arg0 : tensor<?x?xf32>{%arg2, %arg3}
- // CHECK-NEXT: return %0 : tensor<?x?xf32>
+ // CHECK-NEXT: util.return %0 : tensor<?x?xf32>
%0 = flow.tensor.splat %arg0 : tensor<?x4xf32>{%arg1}
%1 = flow.tensor.reshape %0 : tensor<?x4xf32>{%arg1} -> tensor<?x?xf32>{%arg2, %arg3}
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
// -----
-func.func @innermost_unit_dim(%4: !flow.dispatch.tensor<readonly:tensor<3x1x16x257x88xf16>>,
+util.func public @innermost_unit_dim(%4: !flow.dispatch.tensor<readonly:tensor<3x1x16x257x88xf16>>,
%arg0: index, %arg2 : index, %10 : index, %9 : index) -> tensor<?x?x?xf16> {
%c16 = arith.constant 16 : index
%c1 = arith.constant 1 : index
%11 = flow.dispatch.tensor.load %4, offsets = [1, 0, %arg0, %10, %arg2], sizes = [1, 1, %c16, %9, %c1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1x16x257x88xf16>> -> tensor<?x?x?xf16>
- return %11 : tensor<?x?x?xf16>
+ util.return %11 : tensor<?x?x?xf16>
}
-// CHECK-LABEL: func @innermost_unit_dim
+// CHECK-LABEL: util.func public @innermost_unit_dim
// CHECK-SAME: %[[DYNAMIC_DIM:[a-zA-Z0-9]+]]: index)
// CHECK: flow.dispatch.tensor.load
// CHECK-SAME: sizes = [1, 1, 16, %[[DYNAMIC_DIM]], 1]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_ops.mlir
index 091a923..b0a19ad 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_ops.mlir
@@ -1,200 +1,200 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @tensorReshape
-func.func @tensorReshape(%arg0 : tensor<4x4xf32>) -> tensor<16xf32> {
+util.func public @tensorReshape(%arg0 : tensor<4x4xf32>) -> tensor<16xf32> {
// CHECK-NEXT: %0 = flow.tensor.reshape %arg0 : tensor<4x4xf32> -> tensor<16xf32>
%0 = flow.tensor.reshape %arg0 : tensor<4x4xf32> -> tensor<16xf32>
- return %0 : tensor<16xf32>
+ util.return %0 : tensor<16xf32>
}
// CHECK-LABEL: @tensorReshapeScalar
-func.func @tensorReshapeScalar(%arg0 : tensor<f32>) -> tensor<f32> {
+util.func public @tensorReshapeScalar(%arg0 : tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %0 = flow.tensor.reshape %arg0 : tensor<f32> -> tensor<f32>
%0 = flow.tensor.reshape %arg0 : tensor<f32> -> tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// CHECK-LABEL: @tensorReshapeDynamic
-func.func @tensorReshapeDynamic(%arg0 : tensor<?x4xf32>) -> tensor<?x2xf32> {
+util.func public @tensorReshapeDynamic(%arg0 : tensor<?x4xf32>) -> tensor<?x2xf32> {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
// CHECK: %0 = flow.tensor.reshape %arg0 : tensor<?x4xf32>{%c4} -> tensor<?x2xf32>{%c8}
%0 = flow.tensor.reshape %arg0 : tensor<?x4xf32>{%c4} -> tensor<?x2xf32>{%c8}
- return %0 : tensor<?x2xf32>
+ util.return %0 : tensor<?x2xf32>
}
// CHECK-LABEL: @tensorReshapeComplex
-func.func @tensorReshapeComplex(%arg0 : tensor<4x4xcomplex<f32>>) -> tensor<16xcomplex<f32>> {
+util.func public @tensorReshapeComplex(%arg0 : tensor<4x4xcomplex<f32>>) -> tensor<16xcomplex<f32>> {
// CHECK-NEXT: flow.tensor.reshape %arg0 : tensor<4x4xcomplex<f32>> -> tensor<16xcomplex<f32>>
%0 = flow.tensor.reshape %arg0 : tensor<4x4xcomplex<f32>> -> tensor<16xcomplex<f32>>
- return %0 : tensor<16xcomplex<f32>>
+ util.return %0 : tensor<16xcomplex<f32>>
}
// -----
// CHECK-LABEL: @tensorBitCast
-func.func @tensorBitCast(%arg0 : tensor<16xi32>) -> tensor<4x8xi16> {
+util.func public @tensorBitCast(%arg0 : tensor<16xi32>) -> tensor<4x8xi16> {
// CHECK-NEXT: %0 = flow.tensor.bitcast %arg0 : tensor<16xi32> -> tensor<4x8xi16>
%0 = flow.tensor.bitcast %arg0 : tensor<16xi32> -> tensor<4x8xi16>
- return %0 : tensor<4x8xi16>
+ util.return %0 : tensor<4x8xi16>
}
// -----
// CHECK-LABEL: @tensorLoad
-func.func @tensorLoad(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index) -> f32 {
+util.func public @tensorLoad(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index) -> f32 {
// CHECK-NEXT: %0 = flow.tensor.load %arg0[%arg1, %arg2] : tensor<4x4xf32>
%0 = flow.tensor.load %arg0[%arg1, %arg2] : tensor<4x4xf32>
- return %0 : f32
+ util.return %0 : f32
}
// CHECK-LABEL: @tensorLoadScalar
-func.func @tensorLoadScalar(%arg0 : tensor<f32>) -> f32 {
+util.func public @tensorLoadScalar(%arg0 : tensor<f32>) -> f32 {
// CHECK-NEXT: %0 = flow.tensor.load %arg0 : tensor<f32>
%0 = flow.tensor.load %arg0 : tensor<f32>
- return %0 : f32
+ util.return %0 : f32
}
// CHECK-LABEL: @tensorLoadDynamic
-func.func @tensorLoadDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index) -> f32 {
+util.func public @tensorLoadDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index) -> f32 {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.tensor.load %arg0[%arg1, %arg2] : tensor<?x4xf32>{%c4}
%0 = flow.tensor.load %arg0[%arg1, %arg2] : tensor<?x4xf32>{%c4}
- return %0 : f32
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @tensorStore
-func.func @tensorStore(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index, %arg3 : f32) -> tensor<4x4xf32> {
+util.func public @tensorStore(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index, %arg3 : f32) -> tensor<4x4xf32> {
// CHECK-NEXT: %0 = flow.tensor.store %arg3, %arg0[%arg1, %arg2] : tensor<4x4xf32>
%0 = flow.tensor.store %arg3, %arg0[%arg1, %arg2] : tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
+ util.return %0 : tensor<4x4xf32>
}
// CHECK-LABEL: @tensorStoreScalar
-func.func @tensorStoreScalar(%arg0 : f32, %arg1 : tensor<f32>) -> tensor<f32> {
+util.func public @tensorStoreScalar(%arg0 : f32, %arg1 : tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %0 = flow.tensor.store %arg0, %arg1 : tensor<f32>
%0 = flow.tensor.store %arg0, %arg1 : tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// CHECK-LABEL: @tensorStoreDynamic
-func.func @tensorStoreDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index, %arg3 : f32) -> tensor<?x4xf32> {
+util.func public @tensorStoreDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index, %arg3 : f32) -> tensor<?x4xf32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.tensor.store %arg3, %arg0[%arg1, %arg2] : tensor<?x4xf32>{%c4}
%0 = flow.tensor.store %arg3, %arg0[%arg1, %arg2] : tensor<?x4xf32>{%c4}
- return %0 : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @tensorAlloca
-func.func @tensorAlloca(%arg0: index) -> tensor<?x0x1xf32> {
+util.func public @tensorAlloca(%arg0: index) -> tensor<?x0x1xf32> {
// CHECK-NEXT: = flow.tensor.alloca : tensor<?x0x1xf32>{%arg0}
%0 = flow.tensor.alloca : tensor<?x0x1xf32>{%arg0}
- return %0 : tensor<?x0x1xf32>
+ util.return %0 : tensor<?x0x1xf32>
}
// -----
// CHECK-LABEL: @tensorEmpty
-func.func @tensorEmpty(%arg0: index) -> tensor<?x0x1xf32> {
+util.func public @tensorEmpty(%arg0: index) -> tensor<?x0x1xf32> {
// CHECK-NEXT: = flow.tensor.empty : tensor<?x0x1xf32>{%arg0}
%0 = flow.tensor.empty : tensor<?x0x1xf32>{%arg0}
- return %0 : tensor<?x0x1xf32>
+ util.return %0 : tensor<?x0x1xf32>
}
// -----
// CHECK-LABEL: @tensorSplat
-func.func @tensorSplat(%arg0 : f32) -> tensor<4x4xf32> {
+util.func public @tensorSplat(%arg0 : f32) -> tensor<4x4xf32> {
// CHECK-NEXT: %0 = flow.tensor.splat %arg0 : tensor<4x4xf32>
%0 = flow.tensor.splat %arg0 : tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
+ util.return %0 : tensor<4x4xf32>
}
// CHECK-LABEL: @tensorSplatScalar
-func.func @tensorSplatScalar(%arg0 : f32) -> tensor<f32> {
+util.func public @tensorSplatScalar(%arg0 : f32) -> tensor<f32> {
// CHECK-NEXT: %0 = flow.tensor.splat %arg0 : tensor<f32>
%0 = flow.tensor.splat %arg0 : tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// CHECK-LABEL: @tensorSplatDynamic
-func.func @tensorSplatDynamic(%arg0 : f32) -> tensor<?x4xf32> {
+util.func public @tensorSplatDynamic(%arg0 : f32) -> tensor<?x4xf32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.tensor.splat %arg0 : tensor<?x4xf32>{%c4}
%0 = flow.tensor.splat %arg0 : tensor<?x4xf32>{%c4}
- return %0 : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @tensorClone
-func.func @tensorClone(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
+util.func public @tensorClone(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
// CHECK-NEXT: %0 = flow.tensor.clone %arg0 : tensor<4x4xf32>
%0 = flow.tensor.clone %arg0 : tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
+ util.return %0 : tensor<4x4xf32>
}
// CHECK-LABEL: @tensorCloneScalar
-func.func @tensorCloneScalar(%arg0 : tensor<f32>) -> tensor<f32> {
+util.func public @tensorCloneScalar(%arg0 : tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %0 = flow.tensor.clone %arg0 : tensor<f32>
%0 = flow.tensor.clone %arg0 : tensor<f32>
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
// CHECK-LABEL: @tensorCloneDynamic
-func.func @tensorCloneDynamic(%arg0 : tensor<?x4xf32>) -> tensor<?x4xf32> {
+util.func public @tensorCloneDynamic(%arg0 : tensor<?x4xf32>) -> tensor<?x4xf32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.tensor.clone %arg0 : tensor<?x4xf32>{%c4}
%0 = flow.tensor.clone %arg0 : tensor<?x4xf32>{%c4}
- return %0 : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @tensorSlice
-func.func @tensorSlice(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index) -> tensor<2x2xf32> {
+util.func public @tensorSlice(%arg0 : tensor<4x4xf32>, %arg1 : index, %arg2 : index) -> tensor<2x2xf32> {
// CHECK-NEXT: %0 = flow.tensor.slice %arg0[%arg1, %arg2 for %arg2, %arg1] : tensor<4x4xf32> -> tensor<2x2xf32>
%0 = flow.tensor.slice %arg0[%arg1, %arg2 for %arg2, %arg1] : tensor<4x4xf32> -> tensor<2x2xf32>
- return %0 : tensor<2x2xf32>
+ util.return %0 : tensor<2x2xf32>
}
// CHECK-LABEL: @tensorSliceDynamic
-func.func @tensorSliceDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index) -> tensor<?x2xf32> {
+util.func public @tensorSliceDynamic(%arg0 : tensor<?x4xf32>, %arg1 : index, %arg2 : index) -> tensor<?x2xf32> {
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.tensor.slice %arg0[%arg1, %arg2 for %arg2, %arg1] : tensor<?x4xf32>{%c4} -> tensor<?x2xf32>{%c2}
%0 = flow.tensor.slice %arg0[%arg1, %arg2 for %arg2, %arg1] : tensor<?x4xf32>{%c4} -> tensor<?x2xf32>{%c2}
- return %0 : tensor<?x2xf32>
+ util.return %0 : tensor<?x2xf32>
}
// -----
// CHECK-LABEL: @tensorUpdate
-func.func @tensorUpdate(%arg0 : tensor<2x2xf32>, %arg1 : tensor<4x4xf32>, %arg2 : index, %arg3 : index) -> tensor<4x4xf32> {
+util.func public @tensorUpdate(%arg0 : tensor<2x2xf32>, %arg1 : tensor<4x4xf32>, %arg2 : index, %arg3 : index) -> tensor<4x4xf32> {
// CHECK-NEXT: %0 = flow.tensor.update %arg0, %arg1[%arg2, %arg3] : tensor<2x2xf32> -> %arg1 as tensor<4x4xf32>
%0 = flow.tensor.update %arg0, %arg1[%arg2, %arg3] : tensor<2x2xf32> -> %arg1 as tensor<4x4xf32>
- return %0 : tensor<4x4xf32>
+ util.return %0 : tensor<4x4xf32>
}
// CHECK-LABEL: @tensorUpdateDynamic
-func.func @tensorUpdateDynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x4xf32>, %arg2 : index, %arg3 : index) -> tensor<?x4xf32> {
+util.func public @tensorUpdateDynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x4xf32>, %arg2 : index, %arg3 : index) -> tensor<?x4xf32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
// CHECK: %0 = flow.tensor.update %arg0, %arg1[%arg2, %arg3] : tensor<?x?xf32>{%c1, %c2} -> %arg1 as tensor<?x4xf32>{%c3}
%0 = flow.tensor.update %arg0, %arg1[%arg2, %arg3] : tensor<?x?xf32>{%c1, %c2} -> %arg1 as tensor<?x4xf32>{%c3}
- return %0 : tensor<?x4xf32>
+ util.return %0 : tensor<?x4xf32>
}
// -----
// CHECK-LABEL: @tensorTrace
// CHECK-SAME: (%[[TENSOR0:.+]]: tensor<5xf32>, %[[TENSOR1:.+]]: tensor<?x3x?xi32>, %[[TENSOR1_DIM0:.+]]: index, %[[TENSOR1_DIM2:.+]]: index)
-func.func @tensorTrace(%tensor0: tensor<5xf32>, %tensor1: tensor<?x3x?xi32>, %tensor1_dim0: index, %tensor1_dim2: index) {
+util.func public @tensorTrace(%tensor0: tensor<5xf32>, %tensor1: tensor<?x3x?xi32>, %tensor1_dim0: index, %tensor1_dim2: index) {
// CHECK: flow.tensor.trace "FOOBAR" = [
// CHECK-SAME: %[[TENSOR0]] : tensor<5xf32>,
// CHECK-SAME: %[[TENSOR1]] : tensor<?x3x?xi32>{%[[TENSOR1_DIM0]], %[[TENSOR1_DIM2]]}
@@ -203,5 +203,5 @@
%tensor0 : tensor<5xf32>,
%tensor1 : tensor<?x3x?xi32>{%tensor1_dim0, %tensor1_dim2}
]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/types.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/types.mlir
index 9adead0..5de7246 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/types.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/types.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @dispatchTypes
-func.func @dispatchTypes(
+util.func public @dispatchTypes(
// CHECK-SAME: %arg0: !flow.dispatch.tensor<readonly:tensor<f32>>
%arg0: !flow.dispatch.tensor<readonly:tensor<f32>>,
// CHECK-SAME: %arg1: !flow.dispatch.tensor<readonly:tensor<4x4xf32>>
@@ -23,5 +23,5 @@
// CHECK-SAME: %arg9: !flow.dispatch.tensor<writeonly:tensor<1x?x3xf32>>
%arg9: !flow.dispatch.tensor<writeonly:tensor<1x?x3xf32>>
) {
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
index 07db885..47ab089 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
@@ -15,7 +15,6 @@
#include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
@@ -194,7 +193,7 @@
static LogicalResult
createEntryPointBenchmarkFunc(mlir::ModuleOp moduleOp,
- mlir::func::FuncOp entryFuncOp,
+ IREE::Util::FuncOp entryFuncOp,
Explorer &explorer) {
auto symbolTable = explorer.getSymbolTables().getSymbolTable(moduleOp);
OpBuilder moduleBuilder(moduleOp.getContext());
@@ -216,7 +215,7 @@
// Create a `() -> ()` entry point op the benchmark tool can run.
Location loc = entryFuncOp.getLoc();
- auto funcOp = moduleBuilder.create<mlir::func::FuncOp>(
+ auto funcOp = moduleBuilder.create<IREE::Util::FuncOp>(
loc, funcName, moduleBuilder.getFunctionType({}, {}));
funcOp.setPublic();
funcOp->setAttr("iree.abi.stub", moduleBuilder.getUnitAttr());
@@ -236,14 +235,14 @@
.createLoadOp(loc, blockBuilder)
.getLoadedGlobalValue());
}
- auto callOp = blockBuilder.create<mlir::func::CallOp>(loc, entryFuncOp, args);
+ auto callOp = blockBuilder.create<IREE::Util::CallOp>(loc, entryFuncOp, args);
// Sink all results with a barrier to ensure that DCE does not remove the
// call.
for (auto result : callOp.getResults()) {
blockBuilder.create<IREE::Util::OptimizationBarrierOp>(loc, result);
}
- blockBuilder.create<mlir::func::ReturnOp>(loc);
+ blockBuilder.create<IREE::Util::ReturnOp>(loc);
// Ensure the original function is not exported and not inlined.
entryFuncOp->setAttr("noinline", moduleBuilder.getUnitAttr());
@@ -274,8 +273,8 @@
// Gather the functions we want to wrap for benchmarking and wrap them.
// Since we are inserting new functions as part of this pass we must perform
// the wrapping for only the inputs.
- SmallVector<mlir::func::FuncOp> entryFuncOps;
- for (auto entryFuncOp : moduleOp.getOps<mlir::func::FuncOp>()) {
+ SmallVector<IREE::Util::FuncOp> entryFuncOps;
+ for (auto entryFuncOp : moduleOp.getOps<IREE::Util::FuncOp>()) {
if (entryFuncOp.isPublic()) {
entryFuncOps.push_back(entryFuncOp);
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp
index bd417ee..cca18e6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InsertDispatchDebugTargets.cpp
@@ -11,9 +11,10 @@
#include "iree/compiler/Dialect/Flow/Transforms/Passes.h"
#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
+#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Regex.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Diagnostics.h"
@@ -72,7 +73,7 @@
// after the op. Updates the function signature to match the return type of the
// target operation.
static LogicalResult replaceReturnWithOpResults(mlir::ModuleOp moduleOp,
- mlir::func::FuncOp funcOp,
+ IREE::Util::FuncOp funcOp,
Operation *op) {
if (!funcOp->isProperAncestor(op))
return failure();
@@ -110,7 +111,7 @@
// Create the new return and update the function type.
IRRewriter rewriter(builder);
- rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(oldTerminator, exports);
+ rewriter.replaceOpWithNewOp<IREE::Util::ReturnOp>(oldTerminator, exports);
SmallVector<Type> argTypes;
for (const auto &arg : llvm::enumerate(funcOp.getArguments()))
@@ -118,6 +119,7 @@
funcOp.setType(FunctionType::get(context,
/*inputs=*/argTypes, /*results=*/newTypes));
+ funcOp.removeTiedOperandsAttr();
return success();
}
@@ -147,7 +149,7 @@
Operation *operation = op;
// Only look for dispatches in upstream func ops.
- auto funcOp = llvm::dyn_cast<mlir::func::FuncOp>(operation);
+ auto funcOp = llvm::dyn_cast<IREE::Util::FuncOp>(operation);
if (!funcOp)
continue;
@@ -190,7 +192,8 @@
struct InsertDebugTargetAtSymbolPass
: public InsertDebugTargetAtSymbolBase<InsertDebugTargetAtSymbolPass> {
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::Flow::FlowDialect, IREE::HAL::HALDialect>();
+ registry.insert<IREE::Flow::FlowDialect, IREE::HAL::HALDialect,
+ IREE::Util::UtilDialect>();
}
InsertDebugTargetAtSymbolPass(std::string breakStr, std::string traceStr) {
this->breakDebugTarget = breakStr;
@@ -231,7 +234,7 @@
// dispatch is not found within the entry block of the function.
if (breakTarget) {
Operation *operation = funcOp;
- auto mlirFuncOp = dyn_cast<mlir::func::FuncOp>(operation);
+ auto mlirFuncOp = dyn_cast<IREE::Util::FuncOp>(operation);
if (!mlirFuncOp || failed(replaceReturnWithOpResults(
getOperation(), mlirFuncOp, breakTarget)))
return signalPassFailure();
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
index 3ca48ee..fff566d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchRegions.cpp
@@ -158,7 +158,7 @@
public:
OutlineDispatchRegionsPass() = default;
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::Flow::FlowDialect>();
+ registry.insert<func::FuncDialect, IREE::Flow::FlowDialect>();
}
void runOnOperation() override {
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
index f5b314d..93c510f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
@@ -37,7 +37,7 @@
"fusion_of_tensor_ops.mlir",
"initialize_empty_tensors.mlir",
"inject_dispatch_tracing.mlir",
- "insert_dispatch_debug_markers.mlir",
+ "insert_dispatch_debug_targets.mlir",
"interchange_generic_ops.mlir",
"interchange_transpose_generic_ops.mlir",
"outline_dispatch_externs.mlir",
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
index 6af5a6e..9f76000 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
@@ -35,7 +35,7 @@
"fusion_of_tensor_ops.mlir"
"initialize_empty_tensors.mlir"
"inject_dispatch_tracing.mlir"
- "insert_dispatch_debug_markers.mlir"
+ "insert_dispatch_debug_targets.mlir"
"interchange_generic_ops.mlir"
"interchange_transpose_generic_ops.mlir"
"outline_dispatch_externs.mlir"
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
index 92de50e..7ba9196 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
@@ -31,14 +31,14 @@
}
}
}
-func.func @main() -> (tensor<4x8xf32>, tensor<8x4xf32>) {
+util.func public @main() -> (tensor<4x8xf32>, tensor<8x4xf32>) {
%c100 = arith.constant 100 : index
%c50 = arith.constant 50 : index
// CHECK: flow.dispatch @ex0::@dispatch0_fill_4x8_f32
%0 = flow.dispatch @ex0::@dispatch0[%c100, %c50]() : () -> tensor<4x8xf32>
// CHECK: flow.dispatch @ex1::@dispatch1_fill_8x4_f32
%1 = flow.dispatch @ex1::@dispatch1[%c100, %c50]() : () -> tensor<8x4xf32>
- return %0, %1 : tensor<4x8xf32>, tensor<8x4xf32>
+ util.return %0, %1 : tensor<4x8xf32>, tensor<8x4xf32>
}
// -----
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/capture_dispatch_dynamic_dims.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/capture_dispatch_dynamic_dims.mlir
index 4a8882f..2230b87 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/capture_dispatch_dynamic_dims.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/capture_dispatch_dynamic_dims.mlir
@@ -5,7 +5,7 @@
// CHECK-LABEL: @captureDims
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG0_DIM0:.+]]: index, %[[ARG0_DIM1:.+]]: index, %[[RET0_DIM0:.+]]: index, %[[RET0_DIM1:.+]]: index)
-func.func @captureDims(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index, %ret0_dim0: index, %ret0_dim1: index) {
+util.func public @captureDims(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index, %ret0_dim0: index, %ret0_dim1: index) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1, %c1, %c1](%[[ARG0]], %[[ARG0_DIM0]], %[[RET0_DIM0]], %[[ARG0_DIM1]], %[[RET0_DIM1]])
%0 = flow.dispatch.workgroups[%c1, %c1, %c1](%arg0, %arg0_dim0, %ret0_dim0) : (tensor<?x?xf32>{%arg0_dim0, %arg0_dim1}, index, index) -> tensor<?x?xf32>{%ret0_dim0, %ret0_dim1} =
@@ -15,7 +15,7 @@
// CHECK-DAG: = flow.dispatch.tie_shape %[[RET0_CAPTURE]] : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%[[RET0_DIM0_CAPTURE]], %[[RET0_DIM1_CAPTURE]]}
flow.return
}
- return
+ util.return
}
// -----
@@ -25,7 +25,7 @@
// CHECK-LABEL: @capture2DimsForOneTensor
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG0_DIM0:.+]]: index, %[[ARG0_DIM1:.+]]: index, %[[RET0_DIM0:.+]]: index, %[[RET0_DIM1:.+]]: index)
-func.func @capture2DimsForOneTensor(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index, %ret0_dim0: index, %ret0_dim1: index) {
+util.func public @capture2DimsForOneTensor(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index, %ret0_dim0: index, %ret0_dim1: index) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1, %c1, %c1](%[[ARG0]], %[[ARG0_DIM0]], %[[ARG0_DIM1]], %[[RET0_DIM0]], %[[RET0_DIM1]])
%0 = flow.dispatch.workgroups[%c1, %c1, %c1](%arg0) : (tensor<?x?xf32>{%arg0_dim0, %arg0_dim1}) -> tensor<?x?xf32>{%ret0_dim0, %ret0_dim1} =
@@ -35,7 +35,7 @@
// CHECK-DAG: = flow.dispatch.tie_shape %[[RET0_CAPTURE]] : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%[[RET0_DIM0_CAPTURE]], %[[RET0_DIM1_CAPTURE]]}
flow.return
}
- return
+ util.return
}
// -----
@@ -44,7 +44,7 @@
// CHECK-LABEL: @capturedTiedDims
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG0_DIM0:.+]]: index, %[[ARG0_DIM1:.+]]: index)
-func.func @capturedTiedDims(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index) {
+util.func public @capturedTiedDims(%arg0: tensor<?x?xf32>, %arg0_dim0: index, %arg0_dim1: index) {
%c1 = arith.constant 1 : index
// CHECK: flow.dispatch.workgroups[%c1, %c1, %c1](%[[ARG0]], %[[ARG0_DIM0]], %[[ARG0_DIM1]])
%0 = flow.dispatch.workgroups[%c1, %c1, %c1](%arg0, %arg0_dim0) : (tensor<?x?xf32>{%arg0_dim0, %arg0_dim1}, index) -> %arg0{%arg0_dim0, %arg0_dim1} =
@@ -53,5 +53,5 @@
// CHECK-DAG: = flow.dispatch.tie_shape %[[ARG0_CAPTURE]] : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%[[ARG0_DIM0_CAPTURE]], %[[ARG0_DIM1_CAPTURE]]}
flow.return
}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
index 82ade23..d7963e6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
@@ -4,12 +4,12 @@
// CHECK-LABEL: @stripTieShape
// CHECK-SAME: (%[[ARG0:.+]]: tensor<?xi32>, %[[ARG1:.+]]: index)
-func.func @stripTieShape(%arg0: tensor<?xi32>, %arg1: index) {
+util.func public @stripTieShape(%arg0: tensor<?xi32>, %arg1: index) {
// CHECK-NOT: flow.tensor.tie_shape
%0 = flow.tensor.tie_shape %arg0 : tensor<?xi32>{%arg1}
// CHECK: util.optimization_barrier %[[ARG0]]
%1 = util.optimization_barrier %0 : tensor<?xi32>
- return
+ util.return
}
// -----
@@ -19,10 +19,10 @@
// pipeline and if they haven't been by now there's nothing else to lower them
// into.
-func.func @invalidTensorDim(%arg0: tensor<?xi32>) {
+util.func public @invalidTensorDim(%arg0: tensor<?xi32>) {
%c0 = arith.constant 0 : index
// expected-error @+1 {{'tensor.dim' op unexpected during shape cleanup}}
%0 = tensor.dim %arg0, %c0 : tensor<?xi32>
%1 = util.optimization_barrier %0 : index
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/clone_producers_into_dispatch_regions.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/clone_producers_into_dispatch_regions.mlir
index 3393b07..136af8b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/clone_producers_into_dispatch_regions.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/clone_producers_into_dispatch_regions.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-flow-clone-producers-into-dispatch-regions))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-flow-clone-producers-into-dispatch-regions))" %s | FileCheck %s
-func.func @complex_element_type(%input: tensor<4xi32>, %table: tensor<8x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
+util.func public @complex_element_type(%input: tensor<4xi32>, %table: tensor<8x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
%c4095 = arith.constant 4095 : i32
%const = arith.constant dense<[
[(0x7FC00000,0.000000e+00), (0x7FC00000,1.000000e+00)], [(0x7FC00000,2.000000e+00), (0x7FC00000,3.000000e+00)],
@@ -22,10 +22,10 @@
} -> tensor<4x2xcomplex<f32>>
flow.return %generic : tensor<4x2xcomplex<f32>>
}
- return %0 : tensor<4x2xcomplex<f32>>
+ util.return %0 : tensor<4x2xcomplex<f32>>
}
-// CHECK-LABEL: func.func @complex_element_type
+// CHECK-LABEL: util.func public @complex_element_type
// CHECK: flow.dispatch.region
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x2xcomplex<f32>>
// CHECK: %[[CST:.+]] = arith.constant dense<{{.+}}> : tensor<4x2xcomplex<f32>>
@@ -36,7 +36,7 @@
// -----
-func.func @complex_constant_clone(%input: tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
+util.func public @complex_constant_clone(%input: tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
%cst = complex.constant [1.000000e+00 : f32, 2.000000e+00 : f32] : complex<f32>
%empty = tensor.empty() : tensor<4x2xcomplex<f32>>
%0 = linalg.fill ins(%cst : complex<f32>) outs(%empty : tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>>
@@ -51,7 +51,7 @@
} -> tensor<4x2xcomplex<f32>>
flow.return %generic : tensor<4x2xcomplex<f32>>
}
- return %1 : tensor<4x2xcomplex<f32>>
+ util.return %1 : tensor<4x2xcomplex<f32>>
}
// CHECK-LABEL: @complex_constant_clone
@@ -66,7 +66,7 @@
// -----
-func.func @complex_create(%real : f32, %imag : f32, %input: tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
+util.func public @complex_create(%real : f32, %imag : f32, %input: tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>> {
%cst = complex.create %real, %imag : complex<f32>
%empty = tensor.empty() : tensor<4x2xcomplex<f32>>
%0 = linalg.fill ins(%cst : complex<f32>) outs(%empty : tensor<4x2xcomplex<f32>>) -> tensor<4x2xcomplex<f32>>
@@ -81,7 +81,7 @@
} -> tensor<4x2xcomplex<f32>>
flow.return %generic : tensor<4x2xcomplex<f32>>
}
- return %0 : tensor<4x2xcomplex<f32>>
+ util.return %0 : tensor<4x2xcomplex<f32>>
}
// CHECK-LABEL: @complex_create
@@ -96,7 +96,7 @@
// -----
-func.func @use_in_dispatch_count(%arg0: tensor<1xi32>, %arg1: tensor<1xi32>) -> tensor<i32> {
+util.func public @use_in_dispatch_count(%arg0: tensor<1xi32>, %arg1: tensor<1xi32>) -> tensor<i32> {
%c1 = arith.constant 1 : index
%c2_i32 = arith.constant 2 : i32
%c0 = arith.constant 0 : index
@@ -112,7 +112,7 @@
} count() -> (index, index, index) {
flow.return %c1, %c1, %c1 : index, index, index
}
- return %4 : tensor<i32>
+ util.return %4 : tensor<i32>
}
@@ -126,7 +126,7 @@
// -----
-func.func @clone_dequantization(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
+util.func public @clone_dequantization(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x1x4096xf32>
%1 = tensor.empty() : tensor<4096x32x128xf32>
@@ -159,9 +159,9 @@
} -> tensor<1x1x4096xf32>
flow.return %4 : tensor<1x1x4096xf32>
}
- return %9 : tensor<1x1x4096xf32>
+ util.return %9 : tensor<1x1x4096xf32>
}
-// CHECK: func.func @clone_dequantization
+// CHECK: util.func public @clone_dequantization
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<4096x32x128xi8>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1x1x32x128xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<4096x32x1xf32>
@@ -181,13 +181,13 @@
// CHECK-SAME: ins(%[[ARG1]], %[[GEN0]] :
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.return %[[GEN1]] :
-// CHECK: return %[[DISP]]
+// CHECK: util.return %[[DISP]]
// -----
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
module {
- func.func @clone_dequantization_like(%arg0: tensor<32x1x16x1x8xi16>, %arg1: tensor<32x344x16x32x8xi4>) -> tensor<32x1x344x1x32xi32> {
+ util.func public @clone_dequantization_like(%arg0: tensor<32x1x16x1x8xi16>, %arg1: tensor<32x344x16x32x8xi4>) -> tensor<32x1x344x1x32xi32> {
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<32x1x16x1x8xi32>
%1 = linalg.generic {indexing_maps = [#map, #map],
@@ -211,10 +211,10 @@
%7 = linalg.batch_mmt4d ins(%1, %3 : tensor<32x1x16x1x8xi32>, tensor<32x344x16x32x8xi32>) outs(%5 : tensor<32x1x344x1x32xi32>) -> tensor<32x1x344x1x32xi32>
flow.return %7 : tensor<32x1x344x1x32xi32>
}
- return %6 : tensor<32x1x344x1x32xi32>
+ util.return %6 : tensor<32x1x344x1x32xi32>
}
}
-// CHECK: func.func @clone_dequantization
+// CHECK: util.func public @clone_dequantization
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<32x1x16x1x8xi16>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x344x16x32x8xi4>
// CHECK: %[[DISP:.+]] = flow.dispatch.region -> (tensor<32x1x344x1x32xi32>)
@@ -236,4 +236,4 @@
// CHECK-SAME: ins(%[[GEN0]], %[[GEN1]] :
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.return %[[MMT4D]] :
-// CHECK: return %[[DISP]]
+// CHECK: util.return %[[DISP]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_dimensions.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_dimensions.mlir
index 3417108..01a289d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_dimensions.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_dimensions.mlir
@@ -1,7 +1,7 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-flow-collapse-dimensions))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-flow-collapse-dimensions))" %s | FileCheck %s
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @do_not_collapse_cst_in_place(%arg0: tensor<1x1x2304xf32>) {
+util.func public @do_not_collapse_cst_in_place(%arg0: tensor<1x1x2304xf32>) {
%cst = arith.constant dense<0.000000e+00> : tensor<1x1x2304xf32>
%0 = tensor.empty() : tensor<1x1x2304xf32>
%1 = flow.dispatch.region -> (tensor<1x1x2304xf32>) {
@@ -13,9 +13,9 @@
} -> tensor<1x1x2304xf32>
flow.return %3 : tensor<1x1x2304xf32>
}
- return
+ util.return
}
-// CHECK-LABEL: func.func @do_not_collapse_cst_in_place
+// CHECK-LABEL: util.func public @do_not_collapse_cst_in_place
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]]]
// CHECK-DAG: %[[CST:.+]] = arith.constant
// CHECK-DAG: %[[COLLAPSED_ARG0:.+]] = tensor.collapse_shape %[[ARG0]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
index 4ca4637..531161d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_linalg_generic_on_tensors.mlir
@@ -1,8 +1,8 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-clone-producers-into-dispatch-regions, iree-flow-collapse-dimensions, cse))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-clone-producers-into-dispatch-regions, iree-flow-collapse-dimensions, cse))" %s | FileCheck %s
!type = tensor<2x4x8x16x32x64xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse1() -> !type {
+util.func public @collapse1() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -19,12 +19,12 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type
- return %6: !type
+ util.return %6: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0)>
-// CHECK-LABEL: func.func @collapse1
+// CHECK-LABEL: util.func public @collapse1
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1, 2, 3, 4, 5]] : tensor<2x4x8x16x32x64xf32> into tensor<2097152xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<2097152xf32>
@@ -37,7 +37,7 @@
!type = tensor<2x4x8x32x32x64x128xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse2() -> !type {
+util.func public @collapse2() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -54,13 +54,13 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type
- return %6: !type
+ util.return %6: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d2, d4)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK-LABEL: func.func @collapse2
+// CHECK-LABEL: util.func public @collapse2
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1], [2], [3], [4], [5, 6]] : tensor<2x4x8x32x32x64x128xf32> into tensor<8x8x32x32x8192xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<8x8x32x32x8192xf32>
@@ -72,7 +72,7 @@
!type = tensor<2x4x8x16x32x64x128x256xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse3() -> !type {
+util.func public @collapse3() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -89,12 +89,12 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type
- return %result: !type
+ util.return %result: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-// CHECK-LABEL: func.func @collapse3
+// CHECK-LABEL: util.func public @collapse3
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1], [2], [3, 4, 5, 6, 7]] : tensor<2x4x8x16x32x64x128x256xf32> into tensor<8x8x1073741824xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<8x8x1073741824xf32>
@@ -106,7 +106,7 @@
!type = tensor<2x4x8x16x64x64x128x256xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse4() -> !type {
+util.func public @collapse4() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -123,13 +123,13 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type
- return %result: !type
+ util.return %result: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>
-// CHECK-LABEL: func.func @collapse4
+// CHECK-LABEL: util.func public @collapse4
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1], [2], [3], [4], [5], [6, 7]] : tensor<2x4x8x16x64x64x128x256xf32> into tensor<8x8x16x64x64x32768xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<8x8x16x64x64x32768xf32>
@@ -141,7 +141,7 @@
!type = tensor<2x4x32x32x32x64x128x256xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse5() -> !type {
+util.func public @collapse5() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -163,14 +163,14 @@
^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
linalg.yield %arg1 : f32
} -> !type
- return %result: !type
+ util.return %result: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d2, d4, d5)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d3, d2, d1, d4, d5)>
-// CHECK-LABEL: func.func @collapse5
+// CHECK-LABEL: util.func public @collapse5
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1], [2], [3], [4], [5], [6, 7]] : tensor<2x4x32x32x32x64x128x256xf32> into tensor<8x32x32x32x64x32768xf32>
// CHECK: %[[IN1:.+]] = tensor.collapse_shape %[[INPUT1:.+]] {{\[}}[0, 1], [2], [3], [4], [5], [6, 7]] : tensor<2x4x32x32x32x64x128x256xf32> into tensor<8x32x32x32x64x32768xf32>
// CHECK: %[[IN2:.+]] = tensor.collapse_shape %[[INPUT2:.+]] {{\[}}[0, 1], [2], [3], [4], [5], [6, 7]] : tensor<2x4x32x32x32x64x128x256xf32> into tensor<8x32x32x32x64x32768xf32>
@@ -184,7 +184,7 @@
!type = tensor<32x2x4x8x16x16x64x128xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse6() -> !type {
+util.func public @collapse6() -> !type {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -201,13 +201,13 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type
- return %result: !type
+ util.return %result: !type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4, d3, d5)>
-// CHECK-LABEL: func.func @collapse6
+// CHECK-LABEL: util.func public @collapse6
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0], [1], [2, 3], [4], [5], [6, 7]] : tensor<32x2x4x8x16x16x64x128xf32> into tensor<32x2x32x16x16x8192xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<32x2x32x16x16x8192xf32>
@@ -220,7 +220,7 @@
!type_out = tensor<2x4x8x16xf32>
!type_in = tensor<2x4x8xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type_in
-func.func @collapse7() -> !type_out {
+util.func public @collapse7() -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type_in>
@@ -236,12 +236,12 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type_out
- return %result: !type_out
+ util.return %result: !type_out
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d1)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
-// CHECK-LABEL: func.func @collapse7
+// CHECK-LABEL: util.func public @collapse7
// CHECK: %[[IN:.+]] = tensor.collapse_shape %[[INPUT:.+]] {{\[}}[0, 1, 2]] : tensor<2x4x8xf32> into tensor<64xf32>
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: %[[OUT:.+]] = tensor.empty() : tensor<64x16xf32>
@@ -253,7 +253,7 @@
!type_in = tensor<16x4x32x2xf32>
!type_out = tensor<8x16x4x32x8x2xf32>
-func.func @collapse8(%input : !type_in) -> !type_out {
+util.func public @collapse8(%input : !type_in) -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%output = tensor.empty() : !type_out
@@ -268,12 +268,12 @@
%11 = arith.addf %arg1, %arg2 : f32
linalg.yield %11 : f32
} -> !type_out
- return %6: !type_out
+ util.return %6: !type_out
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK-LABEL: func.func @collapse8
+// CHECK-LABEL: util.func public @collapse8
// CHECK-SAME: (%[[IN:.+]]: tensor<16x4x32x2xf32>)
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[IN]] {{\[}}[0, 1, 2], [3]{{\]}}
// CHECK: %[[RES:.+]] = flow.dispatch.region
@@ -286,7 +286,7 @@
!type_in = tensor<16x4xf32>
!type_out = tensor<16x32x4xf32>
-func.func @dont_collapse() -> !type_out {
+util.func public @dont_collapse() -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input = tensor.empty() : !type_in
@@ -301,9 +301,9 @@
%11 = arith.addf %arg1, %arg2 : f32
linalg.yield %11 : f32
} -> !type_out
- return %6: !type_out
+ util.return %6: !type_out
}
-// CHECK-LABEL: func.func @dont_collapse
+// CHECK-LABEL: util.func public @dont_collapse
// CHECK: linalg.generic {indexing_maps = [#[[$MAP:.+]], #[[$MAP2:.+]]], iterator_types = ["parallel", "parallel", "parallel"]}
// -----
@@ -312,7 +312,7 @@
!type_out = tensor<2x4x16x64x32x128x256xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type_in
-func.func @collapse9() -> !type_out {
+util.func public @collapse9() -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type_in>
@@ -329,13 +329,13 @@
^bb0(%arg1: f32, %arg2: f32):
linalg.yield %arg1 : f32
} -> !type_out
- return %result: !type_out
+ util.return %result: !type_out
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d3, d5)>
-// CHECK-LABEL: func.func @collapse9
+// CHECK-LABEL: util.func public @collapse9
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel", "parallel", "parallel"]}
@@ -345,7 +345,7 @@
!type_in = tensor<10x10x30xf32>
!type_out = tensor<20x10x10x30x20xf32>
-func.func @collapse10(%input : !type_in) -> !type_out {
+util.func public @collapse10(%input : !type_in) -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%output = tensor.empty() : !type_out
@@ -360,10 +360,10 @@
linalg.yield %arg1 : f32
} -> !type_out
- return %result: !type_out
+ util.return %result: !type_out
}
-// CHECK-LABEL: func.func @collapse10
+// CHECK-LABEL: util.func public @collapse10
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel"]}
@@ -372,7 +372,7 @@
!type_in = tensor<10x20xf32>
!type_out = tensor<10x20xf32>
-func.func @collapse11(%input : !type_in) -> !type_out {
+util.func public @collapse11(%input : !type_in) -> !type_out {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%output = tensor.empty() : !type_out
@@ -386,19 +386,19 @@
linalg.yield %arg1 : f32
} -> !type_out
- return %result: !type_out
+ util.return %result: !type_out
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0)>
-// CHECK-LABEL: func.func @collapse11
+// CHECK-LABEL: util.func public @collapse11
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel"]}
// -----
!type = tensor<16x32xi32>
-func.func @dont_collapse_dueto_index(%height : index, %width : index) -> !type {
+util.func public @dont_collapse_dueto_index(%height : index, %width : index) -> !type {
%init_source = tensor.empty() : !type
%source = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
@@ -412,10 +412,10 @@
%linearized_i32 = arith.index_cast %linearized : index to i32
linalg.yield %linearized_i32 : i32
} -> !type
- return %source : !type
+ util.return %source : !type
}
-// CHECK-LABEL: func.func @dont_collapse
+// CHECK-LABEL: util.func public @dont_collapse
// CHECK: linalg.generic {indexing_maps = [#[[$MAP:.+]]], iterator_types = ["parallel", "parallel"]}
// -----
@@ -423,7 +423,7 @@
!type = tensor<2x4x8x16x32x64xf32>
util.global private @"__transpose_10_input" {inlining_policy = #util.inline.never} = dense<1.0> : !type
-func.func @collapse12() -> (!type,!type,!type,!type) {
+util.func public @collapse12() -> (!type,!type,!type,!type) {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%input_ptr = util.global.address @"__transpose_10_input" : !util.ptr<!type>
@@ -449,17 +449,17 @@
%3 = arith.addf %2, %arg5 : f32
linalg.yield %0,%1,%2,%3 : f32, f32, f32, f32
} -> (!type,!type,!type,!type)
- return %6, %7, %8, %9 : !type,!type,!type,!type
+ util.return %6, %7, %8, %9 : !type,!type,!type,!type
}
// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0)>
-// CHECK-LABEL: func.func @collapse12
+// CHECK-LABEL: util.func public @collapse12
// CHECK: %[[RES:.+]] = flow.dispatch.region
// CHECK: linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]], #[[$MAP]], #[[$MAP]]], iterator_types = ["parallel"]}
// -----
-func.func @multi_reduce_dim(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
+util.func public @multi_reduce_dim(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
%cst = arith.constant -0.000000e+00 : f32
%1 = tensor.empty() : tensor<2x32xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x32xf32>) -> tensor<2x32xf32>
@@ -469,7 +469,7 @@
linalg.yield %6 : f32
} -> tensor<2x32xf32>
%4 = tensor.expand_shape %3 [[0], [1, 2, 3]] : tensor<2x32xf32> into tensor<2x32x1x1xf32>
- return %4 : tensor<2x32x1x1xf32>
+ util.return %4 : tensor<2x32x1x1xf32>
}
// Check that we collapse dimensions.
@@ -491,7 +491,7 @@
// Collapsing is not supported when an input is broadcasted; we can't collapse
// the input from tensor<4xf32> to tensor<32xf32> for example.
-func.func @input_broadcast(%arg0: tensor<4x8xf32>, %arg1: tensor<4xf32>) -> tensor<f32> {
+util.func public @input_broadcast(%arg0: tensor<4x8xf32>, %arg1: tensor<4xf32>) -> tensor<f32> {
%empty = tensor.empty() : tensor<f32>
%reduce = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> ()>], iterator_types = ["reduction", "reduction"]} ins(%arg0, %arg1 : tensor<4x8xf32>, tensor<4xf32>) outs(%empty : tensor<f32>) {
^bb0(%arg2: f32, %arg3: f32, %out: f32):
@@ -499,7 +499,7 @@
%add = arith.addf %out, %div : f32
linalg.yield %add : f32
} -> tensor<f32>
- return %reduce : tensor<f32>
+ util.return %reduce : tensor<f32>
}
// CHECK: @input_broadcast
@@ -515,7 +515,7 @@
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
module {
- func.func @quantized_matmul(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>) -> tensor<1x1x4096xf32> {
+ util.func public @quantized_matmul(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>) -> tensor<1x1x4096xf32> {
%cst = arith.constant dense_resource<__elided__> : tensor<4096x32xf32>
%cst_0 = arith.constant dense_resource<__elided__> : tensor<4096x32xf32>
%0 = flow.dispatch.region -> (tensor<1x1x4096xf32>) {
@@ -539,23 +539,23 @@
} -> tensor<1x1x4096xf32>
flow.return %5 : tensor<1x1x4096xf32>
}
- return %0 : tensor<1x1x4096xf32>
+ util.return %0 : tensor<1x1x4096xf32>
}
}
-// CHECK-LABEL: func.func @quantized_matmul
+// CHECK-LABEL: util.func public @quantized_matmul
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.region
// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]
// CHECK: flow.return
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
module {
- func.func @batchnorm_failure_repro(%arg0 : tensor<2x4xf32>, %arg1 : tensor<4xf32>) -> tensor<2x4xf32> {
+ util.func public @batchnorm_failure_repro(%arg0 : tensor<2x4xf32>, %arg1 : tensor<4xf32>) -> tensor<2x4xf32> {
%0 = tensor.empty() : tensor<2x4xf32>
%1 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
@@ -565,20 +565,20 @@
%2 = arith.addf %b0, %b1 : f32
linalg.yield %2 : f32
} -> tensor<2x4xf32>
- return %1 : tensor<2x4xf32>
+ util.return %1 : tensor<2x4xf32>
}
}
-// CHECK-LABEL: func @batchnorm_failure_repro
+// CHECK-LABEL: util.func public @batchnorm_failure_repro
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.region
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel"]
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
module {
- func.func @catch_invalid_collapse(%arg0 : tensor<10x20x30xf32>) -> tensor<10x30x40xf32> {
+ util.func public @catch_invalid_collapse(%arg0 : tensor<10x20x30xf32>) -> tensor<10x30x40xf32> {
%0 = tensor.empty() : tensor<10x30x40xf32>
%1 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>],
@@ -587,9 +587,9 @@
^bb0(%b0 : f32, %b1 : f32):
linalg.yield %b0 : f32
} -> tensor<10x30x40xf32>
- return %1 : tensor<10x30x40xf32>
+ util.return %1 : tensor<10x30x40xf32>
}
}
-// CHECK-LABEL: func @catch_invalid_collapse
+// CHECK-LABEL: util.func public @catch_invalid_collapse
// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_reduction.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_reduction.mlir
index 320cb4d..3a87df7 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_reduction.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/collapse_reduction.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt --split-input-file -iree-flow-collapse-dims %s | FileCheck %s
-func.func @multi_reduce_dim(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
+util.func public @multi_reduce_dim(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
%cst = arith.constant -0.000000e+00 : f32
%1 = tensor.empty() : tensor<2x32xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x32xf32>) -> tensor<2x32xf32>
@@ -10,7 +10,7 @@
linalg.yield %6 : f32
} -> tensor<2x32xf32>
%4 = tensor.expand_shape %3 [[0], [1, 2, 3]] : tensor<2x32xf32> into tensor<2x32x1x1xf32>
- return %4 : tensor<2x32x1x1xf32>
+ util.return %4 : tensor<2x32x1x1xf32>
}
// Check that we collapse dimensions.
@@ -22,7 +22,7 @@
// Collapsing is not supported when an input is broadcasted; we can't collapse
// the input from tensor<4xf32> to tensor<32xf32> for example.
-func.func @input_broadcast(%arg0: tensor<4x8xf32>, %arg1: tensor<4xf32>) -> tensor<f32> {
+util.func public @input_broadcast(%arg0: tensor<4x8xf32>, %arg1: tensor<4xf32>) -> tensor<f32> {
%empty = tensor.empty() : tensor<f32>
%reduce = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> ()>], iterator_types = ["reduction", "reduction"]} ins(%arg0, %arg1 : tensor<4x8xf32>, tensor<4xf32>) outs(%empty : tensor<f32>) {
^bb0(%arg2: f32, %arg3: f32, %out: f32):
@@ -30,7 +30,7 @@
%add = arith.addf %out, %div : f32
linalg.yield %add : f32
} -> tensor<f32>
- return %reduce : tensor<f32>
+ util.return %reduce : tensor<f32>
}
// CHECK: @input_broadcast
@@ -40,7 +40,7 @@
// Collapsing should not happen to ops in flow.dispatch.region or flow.dispatch.workgroups
-func.func @multi_reduce_dim_dispatch(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
+util.func public @multi_reduce_dim_dispatch(%arg0: tensor<2x32x10x4096xf32>) -> tensor<2x32x1x1xf32> {
%cst = arith.constant -0.000000e+00 : f32
%1 = tensor.empty() : tensor<2x32xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x32xf32>) -> tensor<2x32xf32>
@@ -53,7 +53,7 @@
flow.return %6 : tensor<2x32xf32>
}
%4 = tensor.expand_shape %3 [[0], [1, 2, 3]] : tensor<2x32xf32> into tensor<2x32x1x1xf32>
- return %4 : tensor<2x32x1x1xf32>
+ util.return %4 : tensor<2x32x1x1xf32>
}
// CHECK: @multi_reduce_dim_dispatch
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/convert_region_to_workgroups.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/convert_region_to_workgroups.mlir
index 19139b2..044dec3 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/convert_region_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/convert_region_to_workgroups.mlir
@@ -1,8 +1,8 @@
// RUN: iree-opt %s --iree-flow-convert-region-to-workgroups -canonicalize -cse -split-input-file | FileCheck %s
-// CHECK-LABEL: func @foo(
+// CHECK-LABEL: util.func public @foo(
// CHECK: %[[argA:.*]]: tensor<?x?xf32>, %[[argB:.*]]: tensor<5x10xf32>, %[[argC:.*]]: tensor<10x11xf32>
-func.func @foo(%argA: tensor<?x?xf32>, %argB: tensor<5x10xf32>, %argC: tensor<10x11xf32>) -> (tensor<?x?xf32>, tensor<5x11xf32>) {
+util.func public @foo(%argA: tensor<?x?xf32>, %argB: tensor<5x10xf32>, %argC: tensor<10x11xf32>) -> (tensor<?x?xf32>, tensor<5x11xf32>) {
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[dim_argA_0:.*]] = tensor.dim %[[argA]], %[[c0]]
@@ -40,6 +40,6 @@
flow.return %2 : tensor<5x11xf32>
}
- // CHECK: return %[[r0]], %[[r1]]
- return %r0, %r1 : tensor<?x?xf32>, tensor<5x11xf32>
+ // CHECK: util.return %[[r0]], %[[r1]]
+ util.return %r0, %r1 : tensor<?x?xf32>, tensor<5x11xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
index c28ffef..8ce0a36 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --iree-flow-deduplicate-executables %s | FileCheck %s
// CHECK-LABEL: flow.executable public @single_executable_ex_0
-flow.executable @single_executable_ex_0 {
+flow.executable public @single_executable_ex_0 {
flow.executable.export @single_executable_entry_0
builtin.module {
func.func @single_executable_entry_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
@@ -10,18 +10,18 @@
}
}
}
-// CHECK-LABEL: func.func @single_executable
-func.func @single_executable(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-LABEL: util.func public @single_executable
+util.func public @single_executable(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @single_executable_ex_0::@single_executable_entry_0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @single_executable_ex_0::@single_executable_entry_0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
// CHECK-LABEL: flow.executable public @duplicate_executables_ex_0
-flow.executable @duplicate_executables_ex_0 {
+flow.executable public @duplicate_executables_ex_0 {
flow.executable.export @duplicate_executables_entry_0
builtin.module {
func.func @duplicate_executables_entry_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
@@ -31,7 +31,7 @@
}
}
// CHECK-NOT: flow.executable public @duplicate_executables_ex_1
-flow.executable @duplicate_executables_ex_1 {
+flow.executable public @duplicate_executables_ex_1 {
flow.executable.export @duplicate_executables_entry_1
builtin.module {
func.func @duplicate_executables_entry_1(%arg0: tensor<4xf32>) -> tensor<4xf32> {
@@ -41,7 +41,7 @@
}
}
// CHECK-LABEL: flow.executable public @duplicate_executables_ex_2
-flow.executable @duplicate_executables_ex_2 {
+flow.executable public @duplicate_executables_ex_2 {
flow.executable.export @duplicate_executables_entry_2
builtin.module {
func.func @duplicate_executables_entry_2(%arg0: tensor<4xf32>) -> tensor<4xf32> {
@@ -50,8 +50,8 @@
}
}
}
-// CHECK-LABEL: func.func @duplicate_executables
-func.func @duplicate_executables(%arg0: tensor<4xf32>) {
+// CHECK-LABEL: util.func public @duplicate_executables
+util.func public @duplicate_executables(%arg0: tensor<4xf32>) {
%c4 = arith.constant 4 : index
// CHECK: = flow.dispatch @duplicate_executables_ex_0::@duplicate_executables_entry_0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @duplicate_executables_ex_0::@duplicate_executables_entry_0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
@@ -61,7 +61,7 @@
%2 = flow.dispatch @duplicate_executables_ex_2::@duplicate_executables_entry_2[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK: = flow.dispatch {@duplicate_executables_ex_0::@duplicate_executables_entry_0, @duplicate_executables_ex_0::@duplicate_executables_entry_0}
%3 = flow.dispatch {@duplicate_executables_ex_0::@duplicate_executables_entry_0, @duplicate_executables_ex_1::@duplicate_executables_entry_1}[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
- return
+ util.return
}
// Ensure that symbol renaming is done within initializers.
@@ -97,14 +97,14 @@
}
}
}
-// CHECK-LABEL: func.func @same_ops_diff_operands
-func.func @same_ops_diff_operands(%arg0: tensor<2xi32>, %arg1: tensor<2xi32>) -> tensor<2xi32> {
+// CHECK-LABEL: util.func public @same_ops_diff_operands
+util.func public @same_ops_diff_operands(%arg0: tensor<2xi32>, %arg1: tensor<2xi32>) -> tensor<2xi32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @same_ops_diff_operands_ex_0::@entry_0[%c4](%arg0, %arg1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
%0 = flow.dispatch @same_ops_diff_operands_ex_0::@entry_0[%c4](%arg0, %arg1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
// CHECK: %1 = flow.dispatch @same_ops_diff_operands_ex_1::@entry_1[%c4](%arg0, %arg1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
%1 = flow.dispatch @same_ops_diff_operands_ex_1::@entry_1[%c4](%arg0, %arg1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
- return %0 : tensor<2xi32>
+ util.return %0 : tensor<2xi32>
}
// -----
@@ -139,8 +139,8 @@
}
}
}
-// CHECK-LABEL: func.func @multiple_entry_points
-func.func @multiple_entry_points(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-LABEL: util.func public @multiple_entry_points
+util.func public @multiple_entry_points(%arg0: tensor<4xf32>) -> tensor<4xf32> {
// CHECK: %[[C4:.*]] = arith.constant 4
%c4 = arith.constant 4 : index
// CHECK: {{.*}} = flow.dispatch @multiple_entry_points_ex_0::@multiple_entry_points_0_entry_0[%[[C4]]](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
@@ -151,7 +151,7 @@
%2 = flow.dispatch @multiple_entry_points_ex_1::@multiple_entry_points_1_entry_0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK-NEXT: {{.*}} = flow.dispatch @multiple_entry_points_ex_0::@multiple_entry_points_0_entry_1[%[[C4]]](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%3 = flow.dispatch @multiple_entry_points_ex_1::@multiple_entry_points_1_entry_1[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
// -----
@@ -176,14 +176,14 @@
}
}
}
-// CHECK-LABEL: func.func @different_types
-func.func @different_types(%arg0: tensor<4xf32>) -> tensor<4xi1> {
+// CHECK-LABEL: util.func public @different_types
+util.func public @different_types(%arg0: tensor<4xf32>) -> tensor<4xi1> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @different_types_float_ex::@different_types_float_entry[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xi1>
%0 = flow.dispatch @different_types_float_ex::@different_types_float_entry[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xi1>
// CHECK: %1 = flow.dispatch @different_types_int_ex::@different_types_int_entry[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xi1>
%1 = flow.dispatch @different_types_int_ex::@different_types_int_entry[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xi1>
- return %0 : tensor<4xi1>
+ util.return %0 : tensor<4xi1>
}
// -----
@@ -234,8 +234,8 @@
}
}
}
-// CHECK-LABEL: func.func @nested_ops
-func.func @nested_ops(%arg0: tensor<5x6xf32>, %arg1: tensor<5x6xf32>) -> tensor<5x6xf32> {
+// CHECK-LABEL: util.func public @nested_ops
+util.func public @nested_ops(%arg0: tensor<5x6xf32>, %arg1: tensor<5x6xf32>) -> tensor<5x6xf32> {
%c4 = arith.constant 4 : index
// CHECK: %0 = flow.dispatch @nested_ops_ex_0::@nested_ops_entry_0[%c4](%arg0, %arg1) : (tensor<5x6xf32>, tensor<5x6xf32>) -> tensor<5x6xf32>
%0 = flow.dispatch @nested_ops_ex_0::@nested_ops_entry_0[%c4](%arg0, %arg1) : (tensor<5x6xf32>, tensor<5x6xf32>) -> tensor<5x6xf32>
@@ -243,7 +243,7 @@
%1 = flow.dispatch @nested_ops_ex_0::@nested_ops_entry_0[%c4](%arg0, %arg1) : (tensor<5x6xf32>, tensor<5x6xf32>) -> tensor<5x6xf32>
// CHECK: %2 = flow.dispatch @nested_ops_ex_2::@nested_ops_entry_2[%c4](%arg0, %arg1) : (tensor<5x6xf32>, tensor<5x6xf32>) -> tensor<5x6xf32>
%2 = flow.dispatch @nested_ops_ex_2::@nested_ops_entry_2[%c4](%arg0, %arg1) : (tensor<5x6xf32>, tensor<5x6xf32>) -> tensor<5x6xf32>
- return %0 : tensor<5x6xf32>
+ util.return %0 : tensor<5x6xf32>
}
// -----
@@ -417,13 +417,13 @@
}
}
-// CHECK-LABEL: func.func @dispatch_variants
-func.func @dispatch_variants(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-LABEL: util.func public @dispatch_variants
+util.func public @dispatch_variants(%arg0: tensor<4xf32>) -> tensor<4xf32> {
// CHECK: %[[C4:.*]] = arith.constant 4
%c4 = arith.constant 4 : index
// CHECK: {{.*}} = flow.dispatch @ex0::@variant::@dispatch[%[[C4]]](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @ex0::@variant::@dispatch[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK-NEXT: {{.*}} = flow.dispatch @ex0::@variant::@dispatch[%[[C4]]](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%1 = flow.dispatch @ex1::@variant::@dispatch[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
- return %1 : tensor<4xf32>
+ util.return %1 : tensor<4xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
index e77d1be..0b53662 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
@@ -1,12 +1,12 @@
-// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-clone-producers-into-dispatch-regions, iree-flow-form-dispatch-workgroups), cse, canonicalize, cse)" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-clone-producers-into-dispatch-regions, iree-flow-form-dispatch-workgroups), cse, canonicalize, cse)" %s | FileCheck %s
-func.func @tile_matmul_alone(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @tile_matmul_alone(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%1 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func.func @tile_matmul_alone
+// CHECK: util.func public @tile_matmul_alone
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
@@ -48,11 +48,11 @@
// CHECK-SAME: offsets = [0, 0], sizes = [%[[ARG10_W]], %[[ARG11_W]]], strides = [1, 1]
// CHECK: count(%[[W0:.+]]: index, %[[W1:.+]]: index, %[[W2:.+]]: index, %[[W3:.+]]: index, %[[W4:.+]]: index, %[[W5:.+]]: index)
// CHECK: %[[WX:.+]], %[[WY:.+]], %[[WZ:.+]] = flow.dispatch.workgroup_count_from_slice %[[W0]], %[[W1]], %[[W2]], %[[W3]], %[[W4]], %[[W5]]
-// CHECK: return %[[WX]], %[[WY]], %[[WZ]]
+// CHECK: flow.return %[[WX]], %[[WY]], %[[WZ]]
// -----
-func.func @generic_op_alone(%A: tensor<?x?xf32>, %B: tensor<?xf32>) -> tensor<?x?xf32> {
+util.func public @generic_op_alone(%A: tensor<?x?xf32>, %B: tensor<?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %A, %c0 : tensor<?x?xf32>
@@ -69,9 +69,9 @@
%2 = arith.addf %arg0, %arg1 : f32
linalg.yield %2 : f32
} -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func.func @generic_op_alone(
+// CHECK: util.func public @generic_op_alone(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -97,7 +97,7 @@
// -----
-func.func @fuse_matmul_with_fill(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @fuse_matmul_with_fill(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>) -> tensor<?x?xf32> {
%zero = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -107,9 +107,9 @@
%1 = linalg.fill ins(%zero : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %2 : tensor<?x?xf32>
+ util.return %2 : tensor<?x?xf32>
}
-// CHECK: func.func @fuse_matmul_with_fill
+// CHECK: util.func public @fuse_matmul_with_fill
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -146,7 +146,7 @@
// -----
-func.func @keep_separate_dispatches_for_producer(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @keep_separate_dispatches_for_producer(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>) -> tensor<?x?xf32> {
%zero = arith.constant 0.0 : f32
%one = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -168,9 +168,9 @@
} -> tensor<?x?xf32>
%4 = linalg.matmul ins(%3, %B : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %4 : tensor<?x?xf32>
+ util.return %4 : tensor<?x?xf32>
}
-// CHECK: func.func @keep_separate_dispatches_for_producer
+// CHECK: util.func public @keep_separate_dispatches_for_producer
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -202,7 +202,7 @@
// -----
-func.func @always_fuse_cast
+util.func public @always_fuse_cast
(%lhs : tensor<?x?xf32>, %rhs1 : tensor<4x?xf32>, %rhs2 : tensor<4x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>)
{
@@ -223,10 +223,10 @@
%2= linalg.matmul
ins(%0, %rhs2 : tensor<?x4xf32>, tensor<4x?xf32>)
outs(%fill2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1, %2 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %1, %2 : tensor<?x?xf32>, tensor<?x?xf32>
}
-// CHECK: func.func @always_fuse_cast(
+// CHECK: util.func public @always_fuse_cast(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<4x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<4x?xf32>
@@ -244,11 +244,11 @@
// CHECK-SAME: (%[[ARG0]], %[[ARG2]], %[[M]], %[[K]], %[[N2]])
// CHECK: tensor.cast
// CHECK: flow.return
-// CHECK: return %[[RESULT1]], %[[RESULT2]]
+// CHECK: util.return %[[RESULT1]], %[[RESULT2]]
// -----
-func.func @dont_fuse_tensor_update_with_fill(
+util.func public @dont_fuse_tensor_update_with_fill(
%arg0: tensor<?x?xf32>, %arg1: tensor<f32>,
%arg2: index, %arg3: index, %arg4: index, %arg5: index)
-> tensor<?x?xf32> {
@@ -262,26 +262,26 @@
%5 = tensor.empty(%3, %4) : tensor<?x?xf32>
%6 = linalg.fill ins(%0 : f32) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
%7 = flow.tensor.update %arg0, %6[%arg2, %arg3] : tensor<?x?xf32>{%1, %2} -> %6 as tensor<?x?xf32>{%3, %4}
- return %7 : tensor<?x?xf32>
+ util.return %7 : tensor<?x?xf32>
}
-// CHECK: func.func @dont_fuse_tensor_update_with_fill
+// CHECK: util.func public @dont_fuse_tensor_update_with_fill
// CHECK: %[[SPLAT:.+]] = flow.tensor.splat
// CHECK: flow.tensor.update %{{.+}}, %[[SPLAT]]
// -----
-func.func @pass_constant_through() -> tensor<2x2x3xi32> {
+util.func public @pass_constant_through() -> tensor<2x2x3xi32> {
%cst = arith.constant dense<[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32>
- return %cst : tensor<2x2x3xi32>
+ util.return %cst : tensor<2x2x3xi32>
}
-// CHECK-LABEL: func.func @pass_constant_through()
+// CHECK-LABEL: util.func public @pass_constant_through()
// CHECK: %[[CST:.+]] = arith.constant dense<{{.+}}> : tensor<2x2x3xi32>
-// CHECK: return %[[CST]]
+// CHECK: util.return %[[CST]]
// -----
-func.func @fuse_matmul_with_generic_op(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
+util.func public @fuse_matmul_with_generic_op(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
-> tensor<?x?xf32> {
%f12 = arith.constant 12.0 : f32
@@ -295,9 +295,9 @@
%D = linalg.matmul ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%CC: tensor<?x?xf32>) -> tensor<?x?xf32>
- return %D: tensor<?x?xf32>
+ util.return %D: tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @fuse_matmul_with_generic_op
+// CHECK-LABEL: util.func public @fuse_matmul_with_generic_op
// linalg.generic is fused inside the dispatch region and becomes dead.
// CHECK-NOT: generic
// CHECK: flow.dispatch.workgroups
@@ -306,7 +306,7 @@
// -----
-func.func @keep_original_producer_uses(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
+util.func public @keep_original_producer_uses(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>) {
%f12 = arith.constant 12.0 : f32
@@ -323,9 +323,9 @@
%D = linalg.matmul ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%CC: tensor<?x?xf32>) -> tensor<?x?xf32>
- return %D, %CC: tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %D, %CC: tensor<?x?xf32>, tensor<?x?xf32>
}
-// CHECK: func.func @keep_original_producer_uses
+// CHECK: util.func public @keep_original_producer_uses
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
@@ -347,11 +347,11 @@
// CHECK-SAME: outs(%[[STOREVAL]] : tensor<?x?xf32>)
// CHECK-DAG: flow.dispatch.tensor.store %[[STOREVAL]], %[[RESULT_CAPTURE]]
// CHECK-DAG: flow.dispatch.tensor.store %[[GEMM]], %[[ARG2_CAPTURE]]
-// CHECK: return %[[origCC]]#0, %[[origCC]]#1
+// CHECK: util.return %[[origCC]]#0, %[[origCC]]#1
// -----
-func.func @conv2d(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>) -> tensor<1x112x112x32xf32> {
+util.func public @conv2d(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>) -> tensor<1x112x112x32xf32> {
%0 = tensor.empty() : tensor<1x112x112x32xf32>
%cst = arith.constant 0.000000e+00 : f32
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
@@ -360,40 +360,40 @@
ins(%input, %filter : tensor<1x225x225x16xf32>, tensor<3x3x16x32xf32>)
outs(%1 : tensor<1x112x112x32xf32>)
-> tensor<1x112x112x32xf32>
- return %2 : tensor<1x112x112x32xf32>
+ util.return %2 : tensor<1x112x112x32xf32>
}
-// CHECK-LABEL: func.func @conv2d
+// CHECK-LABEL: util.func public @conv2d
// CHECK: %[[RESULT:.+]] = flow.dispatch.workgroups
// CHECK: linalg.conv_2d_nhwc_hwcf
// CHECK: flow.return
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @depthwise_conv2d(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
+util.func public @depthwise_conv2d(%input: tensor<1x113x113x96xf32>, %filter: tensor<3x3x96xf32>) -> tensor<1x56x56x96xf32> {
%cst = arith.constant 0.000000e+00 : f32
%1 = tensor.empty() : tensor<1x56x56x96xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
%4 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) outs(%2 : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
- return %4 : tensor<1x56x56x96xf32>
+ util.return %4 : tensor<1x56x56x96xf32>
}
-// CHECK-LABEL: func.func @depthwise_conv2d
+// CHECK-LABEL: util.func public @depthwise_conv2d
// CHECK: %[[RESULT:.+]] = flow.dispatch.workgroups
// CHECK: linalg.depthwise_conv_2d_nhwc_hwc
// CHECK: flow.return
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @subtensor_insert(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
+util.func public @subtensor_insert(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
%arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index) -> tensor<?x?xf32> {
%0 = tensor.insert_slice %arg0 into
%arg1[%arg2, %arg3] [%arg4, %arg5] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK: func.func @subtensor_insert
+// CHECK: util.func public @subtensor_insert
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index
@@ -438,11 +438,11 @@
// CHECK-SAME: offsets = [%[[ARG2_W]], %[[ARG3_W]]]
// CHECK-SAME: sizes = [%[[ARG4_W]], %[[ARG5_W]]]
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%[[ARG1_D0_W]], %[[ARG1_D1_W]]}
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @fuse_non_tiled_reduction_fill(%input1: tensor<1000xf32>, %input2: tensor<1000xf32>, %offset: tensor<f32>) -> tensor<f32> {
+util.func public @fuse_non_tiled_reduction_fill(%input1: tensor<1000xf32>, %input2: tensor<1000xf32>, %offset: tensor<f32>) -> tensor<f32> {
%zero = arith.constant 0.0 : f32
%init = tensor.empty() : tensor<f32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<f32>) -> tensor<f32>
@@ -458,10 +458,10 @@
%558 = arith.addf %557, %arg4 : f32
linalg.yield %558 : f32
} -> tensor<f32>
- return %reduce : tensor<f32>
+ util.return %reduce : tensor<f32>
}
-// CHECK-LABEL: func.func @fuse_non_tiled_reduction_fill
+// CHECK-LABEL: util.func public @fuse_non_tiled_reduction_fill
// CHECK: flow.dispatch.workgroups({{.+}}) : (tensor<1000xf32>, tensor<1000xf32>, tensor<f32>) -> tensor<f32> =
// CHECK-NEXT: (%[[INPUT1:[a-z0-9]+]]: !flow.dispatch.tensor<readonly:tensor<1000xf32>>,
@@ -481,7 +481,7 @@
#map0 = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
-func.func @inline_dag_1(
+util.func public @inline_dag_1(
%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<i32>,
%arg3 : index) -> tensor<1x?xf32> {
%0 = tensor.cast %arg0 : tensor<?x?xf32> to tensor<1x?xf32>
@@ -507,9 +507,9 @@
%14 = arith.addf %12, %13 : f32
linalg.yield %14 : f32
} -> tensor<1x?xf32>
- return %9 : tensor<1x?xf32>
+ util.return %9 : tensor<1x?xf32>
}
-// CHECK-LABEL: func.func @inline_dag_1
+// CHECK-LABEL: util.func public @inline_dag_1
// CHECK-NOT: linalg.
// CHECK-NOT: tensor.extract_slice
// CHECK: flow.dispatch.workgroups
@@ -539,7 +539,7 @@
#map0 = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
-func.func @inline_dag_2(
+util.func public @inline_dag_2(
%arg0: tensor<?x?xf32>, %arg1 : tensor<1x?xf32>, %arg2 : tensor<i32>,
%arg3 : index) -> tensor<1x?xf32> {
%0 = tensor.cast %arg0 : tensor<?x?xf32> to tensor<1x?xf32>
@@ -567,9 +567,9 @@
%14 = arith.addf %12, %13 : f32
linalg.yield %14 : f32
} -> tensor<1x?xf32>
- return %9 : tensor<1x?xf32>
+ util.return %9 : tensor<1x?xf32>
}
-// CHECK-LABEL: func.func @inline_dag_2
+// CHECK-LABEL: util.func public @inline_dag_2
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1x?xf32>
// CHECK: flow.dispatch.workgroups
@@ -595,7 +595,7 @@
// -----
-func.func @inline_dag_3(%240 : tensor<9xi32>, %244 : tensor<18xi32>, %247 : tensor<i32>) -> tensor<9xi1> {
+util.func public @inline_dag_3(%240 : tensor<9xi32>, %244 : tensor<18xi32>, %247 : tensor<i32>) -> tensor<9xi1> {
%c9 = arith.constant 9 : index
%c5_i32 = arith.constant 5 : i32
%c0_i32 = arith.constant 0 : i32
@@ -617,9 +617,9 @@
%849 = arith.cmpi eq, %arg20, %c5_i32 : i32
linalg.yield %849 : i1
} -> tensor<9xi1>
- return %256 : tensor<9xi1>
+ util.return %256 : tensor<9xi1>
}
-// CHECK: func.func @inline_dag_3
+// CHECK: util.func public @inline_dag_3
// CHECK-SAME: %[[ARG0:.+]]: tensor<9xi32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<18xi32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<i32>
@@ -648,7 +648,7 @@
// -----
#map = affine_map<() -> ()>
-func.func @inline_dag_4(%arg0: tensor<4xi32>, %arg1: tensor<i32>) -> tensor<i16> {
+util.func public @inline_dag_4(%arg0: tensor<4xi32>, %arg1: tensor<i32>) -> tensor<i16> {
%c3_i32 = arith.constant 3 : i32
%c0_i32 = arith.constant 0 : i32
%0 = tensor.extract %arg1[] : tensor<i32>
@@ -666,9 +666,9 @@
%9 = arith.trunci %arg2 : i32 to i16
linalg.yield %9 : i16
} -> tensor<i16>
- return %8 : tensor<i16>
+ util.return %8 : tensor<i16>
}
-// CHECK-LABEL: func.func @inline_dag_4
+// CHECK-LABEL: util.func public @inline_dag_4
// CHECK-SAME: %[[ARG0:.+]]: tensor<4xi32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<i32>
// CHECK: flow.dispatch.workgroups
@@ -698,7 +698,7 @@
// -----
-func.func @multi_result(%arg0: tensor<?x?xi32>, %arg1: tensor<?x?xi32>) -> (tensor<?xi32>, tensor<?xi32>) {
+util.func public @multi_result(%arg0: tensor<?x?xi32>, %arg1: tensor<?x?xi32>) -> (tensor<?xi32>, tensor<?xi32>) {
%cmin = arith.constant -2147483648 : i32
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
@@ -724,22 +724,22 @@
%11 = arith.select %7, %9, %10 : i32
linalg.yield %6, %11 : i32, i32
} -> (tensor<?xi32>, tensor<?xi32>)
- return %4#0, %4#1 : tensor<?xi32>, tensor<?xi32>
+ util.return %4#0, %4#1 : tensor<?xi32>, tensor<?xi32>
}
-// CHECK-LABEL: func.func @multi_result
+// CHECK-LABEL: util.func public @multi_result
// CHECK: %[[RESULT_OUT:.+]]:2 = flow.dispatch.workgroups
// CHECK-NEXT: %[[ARG5:[a-zA-Z0-9_]+]]: !flow.dispatch.tensor<writeonly:tensor<?xi32>>
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: !flow.dispatch.tensor<writeonly:tensor<?xi32>>
// CHECK: %[[RESULT:.+]]:2 = linalg.generic
// CHECK-DAG: flow.dispatch.tensor.store %[[RESULT]]#0, %[[ARG5]]
// CHECK-DAG: flow.dispatch.tensor.store %[[RESULT]]#1, %[[ARG6]]
-// CHECK: return %[[RESULT_OUT]]#0, %[[RESULT_OUT]]#1
+// CHECK: util.return %[[RESULT_OUT]]#0, %[[RESULT_OUT]]#1
// -----
// TODO: Maybe this test is now not needed anymore.
-func.func @dynamic_slice(%arg0: tensor<?x?xi32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3 : index) -> tensor<1x?xi32> {
+util.func public @dynamic_slice(%arg0: tensor<?x?xi32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3 : index) -> tensor<1x?xi32> {
%c1_i32 = arith.constant 1 : i32
%c0_i32 = arith.constant 0 : i32
%0 = tensor.extract %arg1[] : tensor<i32>
@@ -755,9 +755,9 @@
%10 = arith.select %9, %8, %c0_i32 : i32
%11 = arith.index_cast %10 : i32 to index
%12 = tensor.extract_slice %arg0[%5, %11] [1, %arg3] [1, 1] : tensor<?x?xi32> to tensor<1x?xi32>
- return %12 : tensor<1x?xi32>
+ util.return %12 : tensor<1x?xi32>
}
-// CHECK-LABEL: func.func @dynamic_slice(
+// CHECK-LABEL: util.func public @dynamic_slice(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xi32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<i32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<i32>
@@ -786,11 +786,11 @@
// CHECK: flow.dispatch.tensor.load %[[ARG0_CAPTURE]]
// CHECK: flow.dispatch.tensor.store %{{.*}}, %[[DEST_CAPTURE]]
// CHECK: flow.return
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @dynamic_dot() -> tensor<?x?xf32> {
+util.func public @dynamic_dot() -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
@@ -801,9 +801,9 @@
%4 = tensor.empty(%2, %3) : tensor<?x?xf32>
%5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
%6 = linalg.matmul ins(%0, %1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %6 : tensor<?x?xf32>
+ util.return %6 : tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @dynamic_dot()
+// CHECK-LABEL: util.func public @dynamic_dot()
// CHECK-NOT: linalg.fill
// CHECK-NOT: linalg.matmul
// CHECK: flow.dispatch.workgroups
@@ -812,11 +812,11 @@
// CHECK: flow.return
// CHECK-NOT: linalg.fill
// CHECK-NOT: linalg.matmul
-// CHECK: return
+// CHECK: util.return
// -----
-func.func @scatter(
+util.func public @scatter(
%original : tensor<?x?xf32>, %indices : tensor<?x1xi32>,
%update : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = iree_linalg_ext.scatter
@@ -828,9 +828,9 @@
%1 = arith.addf %arg0, %arg1 : f32
iree_linalg_ext.yield %1 : f32
} -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK: func.func @scatter(
+// CHECK: util.func public @scatter(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x1xi32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
@@ -855,11 +855,11 @@
// CHECK-SAME: ins(%[[UPDATE]], %[[INDICES]] : tensor<?x?xf32>, tensor<?x1xi32>)
// CHECK-SAME: outs(%[[ORIGINAL]] : tensor<?x?xf32>)
// CHECK: flow.dispatch.tensor.store %[[SCATTER]], %[[ARG0_CAPTURE]]
-// CHECK: return %[[RESULT]] : tensor<?x?xf32>
+// CHECK: util.return %[[RESULT]] : tensor<?x?xf32>
// -----
-func.func @sort_3d(%arg0: tensor<?x?x?xi32>, %arg1 : tensor<?x?x?xf32>)
+util.func public @sort_3d(%arg0: tensor<?x?x?xi32>, %arg1 : tensor<?x?x?xf32>)
-> (tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
%0, %1 = iree_linalg_ext.sort dimension(0)
outs(%arg0, %arg1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
@@ -867,9 +867,9 @@
%2 = arith.cmpf ogt, %arg4, %arg5 : f32
iree_linalg_ext.yield %2 : i1
} -> tensor<?x?x?xi32>, tensor<?x?x?xf32>
- return %0, %1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>
+ util.return %0, %1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>
}
-// CHECK: func.func @sort_3d(
+// CHECK: util.func public @sort_3d(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xi32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -906,11 +906,11 @@
// CHECK-SAME: offsets = [0, 0, 0], sizes = [%[[ARG1_D0_W]], %[[ARG1_D1_W]], %[[ARG1_D2_W]]]
// CHECK: flow.return
// CHECK: }
-// CHECK: return %[[RESULT_OUT]]#0, %[[RESULT_OUT]]#1
+// CHECK: util.return %[[RESULT_OUT]]#0, %[[RESULT_OUT]]#1
// -----
-func.func @scatter_static(%arg0 : tensor<4xi32>, %arg1 : tensor<4x1xi32>, %arg2 : tensor<8xi32>)
+util.func public @scatter_static(%arg0 : tensor<4xi32>, %arg1 : tensor<4x1xi32>, %arg2 : tensor<8xi32>)
-> tensor<8xi32>{
%cst = arith.constant dense<[0, 9, 0, 10, 11, 0, 0, 12]> : tensor<8xi32>
%cst_0 = arith.constant dense<[9, 10, 11, 12]> : tensor<4xi32>
@@ -924,9 +924,9 @@
^bb0(%arg3: i32, %arg4: i32): // no predecessors
iree_linalg_ext.yield %arg3 : i32
} -> tensor<8xi32>
- return %0 : tensor<8xi32>
+ util.return %0 : tensor<8xi32>
}
-// CHECK: func.func @scatter_static
+// CHECK: util.func public @scatter_static
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<4xi32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<4x1xi32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<8xi32>
@@ -936,22 +936,22 @@
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: !flow.dispatch.tensor<readwrite:tensor<8xi32>>
// CHECK: %[[SCATTER_TILE:.+]] = iree_linalg_ext.scatter
// CHECK: flow.dispatch.tensor.store %[[SCATTER_TILE]], %[[ARG5]], offsets = [0], sizes = [8], strides = [1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
// Check that we are distributing along the last three dimensions for NHWC-output pooling op.
-func.func @pooling_nwhc_sum_static(%input: tensor<1x33x33x160xf32>) -> tensor<1x3x3x160xf32> {
+util.func public @pooling_nwhc_sum_static(%input: tensor<1x33x33x160xf32>) -> tensor<1x3x3x160xf32> {
%cst = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<1x3x3x160xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x3x3x160xf32>) -> tensor<1x3x3x160xf32>
%3 = tensor.empty() : tensor<11x11xf32>
%4 = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<11> : vector<2xi64>} ins(%input, %3 : tensor<1x33x33x160xf32>, tensor<11x11xf32>) outs(%2 : tensor<1x3x3x160xf32>) -> tensor<1x3x3x160xf32>
- return %4 : tensor<1x3x3x160xf32>
+ util.return %4 : tensor<1x3x3x160xf32>
}
-// CHECK-LABEL: func.func @pooling_nwhc_sum_static
+// CHECK-LABEL: util.func public @pooling_nwhc_sum_static
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.workgroups(
// CHECK-DAG: %[[INPUT:.+]] = flow.dispatch.tensor.load
// CHECK-DAG: %[[EMPTY0:.+]] = tensor.empty() : tensor<1x3x3x160xf32>
@@ -963,11 +963,11 @@
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.dispatch.tensor.store %[[POOL]]
// CHECK: flow.return
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
-func.func @named_op_outs_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @named_op_outs_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst1 = arith.constant -1.0 : f64
@@ -980,9 +980,9 @@
outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%matmul = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %matmul : tensor<?x?xf32>
+ util.return %matmul : tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @named_op_outs_fusion
+// CHECK-LABEL: util.func public @named_op_outs_fusion
// CHECK: flow.dispatch.workgroups
// CHECK: %[[FILL:.+]] = linalg.fill_rng_2d
// CHECK: linalg.matmul
@@ -990,7 +990,7 @@
// -----
-func.func @dynamic_slice(%arg0 : i32, %arg1 : i32, %arg2 : tensor<?xi32>,
+util.func public @dynamic_slice(%arg0 : i32, %arg1 : i32, %arg2 : tensor<?xi32>,
%arg3 : tensor<?x?xi32>) -> tensor<?x?xi32>{
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
@@ -1008,9 +1008,9 @@
%d0 = tensor.dim %arg2, %c0 : tensor<?xi32>
%17 = tensor.insert_slice %arg2 into
%arg3[%9, %15] [1, %d0] [1, 1] : tensor<?xi32> into tensor<?x?xi32>
- return %17 : tensor<?x?xi32>
+ util.return %17 : tensor<?x?xi32>
}
-// CHECK-LABEL: func.func @dynamic_slice
+// CHECK-LABEL: util.func public @dynamic_slice
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: i32
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: i32
// CHECK-SAME: %[[ARG2:.+]]: tensor<?xi32>
@@ -1025,13 +1025,13 @@
// -----
-func.func @extract_slice(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
+util.func public @extract_slice(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
%arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> tensor<?x?xf32> {
%0 = tensor.extract_slice %arg0[%arg1, %arg2] [%arg3, %arg4] [%arg5, %arg6] :
tensor<?x?xf32> to tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK: func.func @extract_slice
+// CHECK: util.func public @extract_slice
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
@@ -1070,18 +1070,18 @@
// -----
// TODO(ravishankarm): Enable after upstream pad op tiling issues are addressed.
-// func.func @tensor.pad(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
+// util.func public @tensor.pad(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
// %arg3 : index, %arg4 : index, %arg5 : f32) -> tensor<?x?xf32> {
// %0 = tensor.pad %arg0 low[%arg1, %arg2] high[%arg3, %arg4] {
// ^bb0(%arg6 : index, %arg7 : index):
// tensor.yield %arg5 : f32
// } : tensor<?x?xf32> to tensor<?x?xf32>
-// return %0 : tensor<?x?xf32>
+// util.return %0 : tensor<?x?xf32>
// }
// -----
-func.func @inline_cst(%arg0 : tensor<4x32xi32>) -> tensor<32xi32> {
+util.func public @inline_cst(%arg0 : tensor<4x32xi32>) -> tensor<32xi32> {
%cst = arith.constant dense<0> : tensor<32xi32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>],
@@ -1091,16 +1091,16 @@
%1 = arith.addi %arg1, %arg2 : i32
linalg.yield %1 : i32
} -> tensor<32xi32>
- return %0 : tensor<32xi32>
+ util.return %0 : tensor<32xi32>
}
-// CHECK: func.func @inline_cst(%[[ARG0:.+]]: tensor<4x32xi32>)
+// CHECK: util.func public @inline_cst(%[[ARG0:.+]]: tensor<4x32xi32>)
// CHECK: flow.dispatch.workgroups
// CHECK-SAME: (%[[ARG0]])
// CHECK: %[[CST:.+]] = arith.constant dense<0> : tensor<32xi32>
// -----
-func.func @inline_cst2(%arg0 : tensor<4x2xi32>) -> tensor<2xi32> {
+util.func public @inline_cst2(%arg0 : tensor<4x2xi32>) -> tensor<2xi32> {
%cst = arith.constant dense<[21, 42]> : tensor<2xi32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>],
@@ -1110,9 +1110,9 @@
%1 = arith.addi %arg1, %arg2 : i32
linalg.yield %1 : i32
} -> tensor<2xi32>
- return %0 : tensor<2xi32>
+ util.return %0 : tensor<2xi32>
}
-// CHECK-LABEL: func.func @inline_cst2(
+// CHECK-LABEL: util.func public @inline_cst2(
// CHECK-SAME: %[[ARG0:.+]]: tensor<4x2xi32>)
// CHECK: flow.dispatch.workgroups
// CHECK-SAME: (%[[ARG0]])
@@ -1120,14 +1120,14 @@
// -----
-func.func @gemm_unitN(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x1xf32>,
+util.func public @gemm_unitN(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x1xf32>,
%arg2 : tensor<?x1xf32>) -> tensor<?x1xf32> {
%0 = linalg.matmul
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x1xf32>)
outs(%arg2 : tensor<?x1xf32>) -> tensor<?x1xf32>
- return %0 : tensor<?x1xf32>
+ util.return %0 : tensor<?x1xf32>
}
-// CHECK-LABEL: func.func @gemm_unitN(
+// CHECK-LABEL: util.func public @gemm_unitN(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>,
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x1xf32>,
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x1xf32>)
@@ -1141,27 +1141,27 @@
// -----
-func.func @gemm_unitM_unitN(%arg0 : tensor<1x1xf32>, %arg1 : tensor<1x1xf32>,
+util.func public @gemm_unitM_unitN(%arg0 : tensor<1x1xf32>, %arg1 : tensor<1x1xf32>,
%arg2 : tensor<1x1xf32>) -> tensor<1x1xf32> {
%0 = linalg.matmul
ins(%arg0, %arg1 : tensor<1x1xf32>, tensor<1x1xf32>)
outs(%arg2 : tensor<1x1xf32>) -> tensor<1x1xf32>
- return %0 : tensor<1x1xf32>
+ util.return %0 : tensor<1x1xf32>
}
-// CHECK-LABEL: func.func @gemm_unitM_unitN(
+// CHECK-LABEL: util.func public @gemm_unitM_unitN(
// CHECK: flow.dispatch.workgroups(
// CHECK: linalg.matmul
// -----
-func.func @gemm_unitM(%arg0 : tensor<1x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @gemm_unitM(%arg0 : tensor<1x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<1x?xf32>) -> tensor<1x?xf32> {
%0 = linalg.matmul
ins(%arg0, %arg1 : tensor<1x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<1x?xf32>) -> tensor<1x?xf32>
- return %0 : tensor<1x?xf32>
+ util.return %0 : tensor<1x?xf32>
}
-// CHECK-LABEL: func.func @gemm_unitM(
+// CHECK-LABEL: util.func public @gemm_unitM(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x?xf32>,
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>,
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<1x?xf32>)
@@ -1176,7 +1176,7 @@
// -----
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>
-func.func @unit_dim_generic(%arg0 : tensor<1x?x1x1x?x?x1x?xf32>,
+util.func public @unit_dim_generic(%arg0 : tensor<1x?x1x1x?x?x1x?xf32>,
%arg1 : tensor<1x?x1x1x?x?x1x?xf32>) -> tensor<1x?x1x1x?x?x1x?xf32> {
%0 = linalg.generic {
indexing_maps = [#map, #map, #map],
@@ -1187,9 +1187,9 @@
%1 = arith.addf %arg2, %arg3 : f32
linalg.yield %1 : f32
} -> tensor<1x?x1x1x?x?x1x?xf32>
- return %0 : tensor<1x?x1x1x?x?x1x?xf32>
+ util.return %0 : tensor<1x?x1x1x?x?x1x?xf32>
}
-// CHECK: func.func @unit_dim_generic(
+// CHECK: util.func public @unit_dim_generic(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<1x?x1x1x?x?x1x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<1x?x1x1x?x?x1x?xf32>)
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -1209,7 +1209,7 @@
// -----
-func.func @dont_fuse_tensor_insert_dest_producer(%arg0 : tensor<2x2xf32>) -> tensor<3x3xf32> {
+util.func public @dont_fuse_tensor_insert_dest_producer(%arg0 : tensor<2x2xf32>) -> tensor<3x3xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant dense<0.0> : tensor<3x3xf32>
@@ -1224,9 +1224,9 @@
} -> tensor<2x2xf32>
%1 = tensor.insert_slice %0 into %cst[0, 0] [2, 2] [1, 1]
: tensor<2x2xf32> into tensor<3x3xf32>
- return %1 : tensor<3x3xf32>
+ util.return %1 : tensor<3x3xf32>
}
-// CHECK: func.func @dont_fuse_tensor_insert_dest_producer
+// CHECK: util.func public @dont_fuse_tensor_insert_dest_producer
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x2xf32>
// CHECK: %[[CST:.+]] = arith.constant {{.+}} : tensor<3x3xf32>
// CHECK: %[[DISPATCH1:.+]] = flow.dispatch.workgroups
@@ -1234,26 +1234,26 @@
// CHECK: flow.return
// CHECK: %[[DISPATCH2:.+]] = flow.dispatch.workgroups
// CHECK-SAME: (%[[DISPATCH1]], %[[CST]])
-// CHECK: return %[[DISPATCH2]]
+// CHECK: util.return %[[DISPATCH2]]
// -----
-func.func @fill_op_alone(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
+util.func public @fill_op_alone(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
%cst = arith.constant 42.0 : f32
%0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func.func @fill_op_alone(
+// CHECK: util.func public @fill_op_alone(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// CHECK: %[[SPLAT:.+]] = flow.tensor.splat %[[CST]] : tensor<?x?xf32>{%arg0, %arg1}
-// CHECK: return %[[SPLAT]]
+// CHECK: util.return %[[SPLAT]]
// -----
// Reshapes cannot be fused until #8637 is fixed.
-func.func @dont_fuse_reshape(%lhs : tensor<?xf32>, %rhs1 : tensor<4x?xf32>, %rhs2 : tensor<4x?xf32>)
+util.func public @dont_fuse_reshape(%lhs : tensor<?xf32>, %rhs1 : tensor<4x?xf32>, %rhs2 : tensor<4x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>)
{
%cst = arith.constant 0.0 : f32
@@ -1273,9 +1273,9 @@
%2= linalg.matmul
ins(%0, %rhs2 : tensor<?x4xf32>, tensor<4x?xf32>)
outs(%fill2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1, %2 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %1, %2 : tensor<?x?xf32>, tensor<?x?xf32>
}
-// CHECK: func.func @dont_fuse_reshape(
+// CHECK: util.func public @dont_fuse_reshape(
// CHECK-SAME: %[[LHS:.+]]: tensor<?xf32>
// CHECK-DAG: %[[RESHAPE:.+]] = flow.tensor.reshape %[[LHS]]
// CHECK: %[[DISPATCH1:.+]] = flow.dispatch.workgroups
@@ -1290,39 +1290,39 @@
// CHECK: linalg.fill
// CHECK: linalg.matmul
// CHECK: flow.return
-// CHECK: return %[[DISPATCH1]], %[[DISPATCH2]]
+// CHECK: util.return %[[DISPATCH1]], %[[DISPATCH2]]
// -----
// TODO: Maybe this test is now not needed anymore.
-func.func @concat_pattern(%src1 : tensor<2x40xf32>, %src2 : tensor<3x40xf32>,
+util.func public @concat_pattern(%src1 : tensor<2x40xf32>, %src2 : tensor<3x40xf32>,
%dest : tensor<5x40xf32>) -> tensor<5x40xf32> {
%0 = tensor.insert_slice %src1 into %dest[0, 0] [2, 40] [1, 1]
: tensor<2x40xf32> into tensor<5x40xf32>
%1 = tensor.insert_slice %src2 into %0[2, 0] [3, 40] [1, 1]
: tensor<3x40xf32> into tensor<5x40xf32>
- return %1 : tensor<5x40xf32>
+ util.return %1 : tensor<5x40xf32>
}
-// CHECK: func.func @concat_pattern
+// CHECK: util.func public @concat_pattern
// CHECK-SAME: %[[SRC1:.+]]: tensor<2x40xf32>
// CHECK-SAME: %[[SRC2:.+]]: tensor<3x40xf32>
// CHECK-SAME: %[[DEST:.+]]: tensor<5x40xf32>
// CHECK: %[[UPDATE1:.+]] = flow.tensor.update %[[SRC1]], %[[DEST]]
// CHECK: %[[UPDATE2:.+]] = flow.tensor.update %[[SRC2]], %[[UPDATE1]]
-// CHECK: return %[[UPDATE2]]
+// CHECK: util.return %[[UPDATE2]]
// -----
-func.func @generic_tensor_insert(%arg0 : tensor<?x?xf32>,
+util.func public @generic_tensor_insert(%arg0 : tensor<?x?xf32>,
%arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
%arg5 : index, %arg6 : index, %arg7 : index, %arg8 : index,
%arg9 : index, %arg10 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = tensor.extract_slice %arg0[%arg1, %arg2] [1, %arg3] [%arg4, %arg5] : tensor<?x?xf32> to tensor<?xf32>
%1 = tensor.insert_slice %0 into %arg10[%arg6, %arg7] [%arg3, 1] [%arg8, %arg9] : tensor<?xf32> into tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func.func @generic_tensor_insert(
+// CHECK: util.func public @generic_tensor_insert(
// CHECK-SAME: %[[SOURCE:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[SOURCE_OFFSET_Y:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[SOURCE_OFFSET_X:[a-zA-Z0-9]+]]: index
@@ -1390,7 +1390,7 @@
#map0 = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1) -> (d1)>
-func.func @multi_use_producer_fusion(%arg0 : tensor<?x8xf32>, %arg1 : tensor<8x?xf32>,
+util.func public @multi_use_producer_fusion(%arg0 : tensor<?x8xf32>, %arg1 : tensor<8x?xf32>,
%arg2 : tensor<?xf32>) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -1409,9 +1409,9 @@
%0 = arith.addf %b0, %b1 : f32
linalg.yield %0 : f32
} -> tensor<?x?xf32>
- return %matmul, %generic : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %matmul, %generic : tensor<?x?xf32>, tensor<?x?xf32>
}
-// CHECK: func @multi_use_producer_fusion
+// CHECK: util.func public @multi_use_producer_fusion
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x8xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<8x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?xf32>
@@ -1447,20 +1447,20 @@
// CHECK-SAME: outs(%[[INIT]] :
// CHECK-DAG: flow.dispatch.tensor.store %[[GENERIC]], %[[RESULT0]]
// CHECK-DAG: flow.dispatch.tensor.store %[[MATMUL]], %[[RESULT1]]
-// CHECK: return %[[DISPATCH]]#1, %[[DISPATCH]]#0
+// CHECK: util.return %[[DISPATCH]]#1, %[[DISPATCH]]#0
// -----
-func.func @fft_cst_output(%arg0 : tensor<3x2190x1x512xf32>) -> (tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>) {
+util.func public @fft_cst_output(%arg0 : tensor<3x2190x1x512xf32>) -> (tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>) {
%c1 = arith.constant 1 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
%cst_0 = arith.constant dense<-0.000000e+00> : tensor<1xf32>
%cst_1 = arith.constant dense<0.000000e+00> : tensor<3x2190x1x512xf32>
%0:2 = iree_linalg_ext.fft ins(%c1, %cst, %cst_0 : index, tensor<1xf32>, tensor<1xf32>)
outs(%arg0, %cst_1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>) : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
- return %0#0, %0#1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
+ util.return %0#0, %0#1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
}
-// CHECK: func @fft_cst_output
+// CHECK: util.func public @fft_cst_output
// CHECK-SAME: %[[ARG0:.+]]: tensor<3x2190x1x512xf32>
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.workgroups
// CHECK-SAME: (%[[ARG0]]) : (tensor<3x2190x1x512xf32>) -> (%[[ARG0]], tensor<3x2190x1x512xf32>)
@@ -1475,7 +1475,7 @@
// -----
-func.func @fuse_conv2d_elementwise(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
+util.func public @fuse_conv2d_elementwise(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x112x112x32xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
@@ -1496,7 +1496,7 @@
%sub = arith.subf %a, %b : f32
linalg.yield %sub : f32
} -> tensor<1x112x112x32xf32>
- return %3 : tensor<1x112x112x32xf32>
+ util.return %3 : tensor<1x112x112x32xf32>
}
// Check that
@@ -1504,7 +1504,7 @@
// * linalg.generic's linalg.fill is pulled into the same group;
// * linalg.conv's linalg.fill is pulled into the same group.
-// CHECK-LABEL: func.func @fuse_conv2d_elementwise
+// CHECK-LABEL: util.func public @fuse_conv2d_elementwise
// CHECK: flow.dispatch.workgroups
// CHECK: %[[INIT:.+]] = tensor.empty
@@ -1518,7 +1518,7 @@
// -----
-func.func @fuse_conv2d_with_multiple_uses(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>)
+util.func public @fuse_conv2d_with_multiple_uses(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>)
-> (tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>) {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x112x112x32xf32>
@@ -1540,10 +1540,10 @@
%sub = arith.subf %a, %b : f32
linalg.yield %sub : f32
} -> tensor<1x112x112x32xf32>
- return %3, %2 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>
+ util.return %3, %2 : tensor<1x112x112x32xf32>, tensor<1x112x112x32xf32>
}
-// CHECK-LABEL: func.func @fuse_conv2d_with_multiple_uses
+// CHECK-LABEL: util.func public @fuse_conv2d_with_multiple_uses
// CHECK: %[[DISPATCH:.+]]:2 = flow.dispatch.workgroups
// CHECK-NEXT: %[[OUT1:[a-zA-Z0-9]+]]: !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
// CHECK-SAME: %[[OUT2:.+]]: !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
@@ -1551,11 +1551,11 @@
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-DAG: flow.dispatch.tensor.store %[[GENERIC]], %[[OUT1]]
// CHECK-DAG: flow.dispatch.tensor.store %[[CONV]], %[[OUT2]]
-// CHECK: return %[[DISPATCH]]#0, %[[DISPATCH]]#1
+// CHECK: util.return %[[DISPATCH]]#0, %[[DISPATCH]]#1
// -----
-func.func @dont_fuse_conv2d_with_non_identity_map(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
+util.func public @dont_fuse_conv2d_with_non_identity_map(%input: tensor<1x225x225x16xf32>, %filter: tensor<3x3x16x32xf32>, %offset: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x112x112x32xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
@@ -1576,10 +1576,10 @@
%sub = arith.subf %a, %b : f32
linalg.yield %sub : f32
} -> tensor<1x112x112x32xf32>
- return %3 : tensor<1x112x112x32xf32>
+ util.return %3 : tensor<1x112x112x32xf32>
}
-// CHECK-LABEL: func.func @dont_fuse_conv2d_with_non_identity_map
+// CHECK-LABEL: util.func public @dont_fuse_conv2d_with_non_identity_map
// CHECK: flow.dispatch.workgroups
// CHECK: linalg.conv_2d_nhwc_hwcf
@@ -1592,7 +1592,7 @@
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @reduction_broadcast_elementwise_unary(%a: tensor<12x16x16xf32>, %b: tensor<12x16x16xf32>) -> tensor<12x16x16xf32> {
+util.func public @reduction_broadcast_elementwise_unary(%a: tensor<12x16x16xf32>, %b: tensor<12x16x16xf32>) -> tensor<12x16x16xf32> {
%cst_47 = arith.constant 0.000000e+00 : f32
%37 = tensor.empty() : tensor<12x16xf32>
%38 = linalg.fill ins(%cst_47 : f32) outs(%37 : tensor<12x16xf32>) -> tensor<12x16xf32>
@@ -1607,14 +1607,14 @@
%780 = arith.subf %arg3, %arg4 : f32
linalg.yield %780 : f32
} -> tensor<12x16x16xf32>
- return %42 : tensor<12x16x16xf32>
+ util.return %42 : tensor<12x16x16xf32>
}
// There is only one input to the reduction.
// Check that two generic ops are dispatched together.
// The first generic (reduction) is directly used by the second generic (elementwise).
-// CHECK-LABEL: func.func @reduction_broadcast_elementwise_unary
+// CHECK-LABEL: util.func public @reduction_broadcast_elementwise_unary
// CHECK: flow.dispatch.workgroups
// CHECK: %[[RED:.+]] = linalg.generic
// CHECK: linalg.generic
@@ -1625,7 +1625,7 @@
#map1 = affine_map<(d0, d1) -> (d0)>
#map2 = affine_map<(d0, d1) -> (d0, d1)>
-func.func @reduction_broadcast_elementwise_binary1(%a1: tensor<128x384xf32>, %a2: tensor<128xf32>, %b: tensor<128x384xf32>) -> tensor<128x384xf32> {
+util.func public @reduction_broadcast_elementwise_binary1(%a1: tensor<128x384xf32>, %a2: tensor<128xf32>, %b: tensor<128x384xf32>) -> tensor<128x384xf32> {
%cst_47 = arith.constant 0.000000e+00 : f32
%37 = tensor.empty() : tensor<128xf32>
%38 = linalg.fill ins(%cst_47 : f32) outs(%37 : tensor<128xf32>) -> tensor<128xf32>
@@ -1642,14 +1642,14 @@
%780 = arith.subf %arg3, %arg4 : f32
linalg.yield %780 : f32
} -> tensor<128x384xf32>
- return %42 : tensor<128x384xf32>
+ util.return %42 : tensor<128x384xf32>
}
// There are two inputs to the reduction and one of them is broadcasted.
// Check that two generic ops are dispatched together.
// The first generic (reduction) is directly used by the second generic (elementwise).
-// CHECK-LABEL: func.func @reduction_broadcast_elementwise_binary1
+// CHECK-LABEL: util.func public @reduction_broadcast_elementwise_binary1
// CHECK: flow.dispatch.workgroups
// CHECK: %[[RED:.+]] = linalg.generic
// CHECK: linalg.generic
@@ -1661,7 +1661,7 @@
#map2 = affine_map<(d0, d1) -> (d0, d1)>
#map3 = affine_map<(d0, d1) -> (d1)>
-func.func @reduction_broadcast_elementwise_binary2(%a1: tensor<128x384xf32>, %a2: tensor<384xf32>, %b: tensor<128x384xf32>) -> tensor<128x384xf32> {
+util.func public @reduction_broadcast_elementwise_binary2(%a1: tensor<128x384xf32>, %a2: tensor<384xf32>, %b: tensor<128x384xf32>) -> tensor<128x384xf32> {
%cst_47 = arith.constant 0.000000e+00 : f32
%37 = tensor.empty() : tensor<128xf32>
%38 = linalg.fill ins(%cst_47 : f32) outs(%37 : tensor<128xf32>) -> tensor<128xf32>
@@ -1678,14 +1678,14 @@
%780 = arith.subf %arg3, %arg4 : f32
linalg.yield %780 : f32
} -> tensor<128x384xf32>
- return %42 : tensor<128x384xf32>
+ util.return %42 : tensor<128x384xf32>
}
// There are two inputs to the reduction and one of them is broadcasted.
// Check that two generic ops are dispatched together.
// The first generic (reduction) is directly used by the second generic (elementwise).
-// CHECK-LABEL: func.func @reduction_broadcast_elementwise_binary2
+// CHECK-LABEL: util.func public @reduction_broadcast_elementwise_binary2
// CHECK: flow.dispatch.workgroups
// CHECK: %[[RED:.+]] = linalg.generic
// CHECK: linalg.generic
@@ -1696,7 +1696,7 @@
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @reduction_broadcast_elementwise_dynamic(%a: tensor<12x16x?xf32>, %b: tensor<12x16x?xf32>) -> tensor<12x16x?xf32> {
+util.func public @reduction_broadcast_elementwise_dynamic(%a: tensor<12x16x?xf32>, %b: tensor<12x16x?xf32>) -> tensor<12x16x?xf32> {
%cst_47 = arith.constant 0.000000e+00 : f32
%37 = tensor.empty() : tensor<12x16xf32>
%38 = linalg.fill ins(%cst_47 : f32) outs(%37 : tensor<12x16xf32>) -> tensor<12x16xf32>
@@ -1713,12 +1713,12 @@
%780 = arith.subf %arg3, %arg4 : f32
linalg.yield %780 : f32
} -> tensor<12x16x?xf32>
- return %42 : tensor<12x16x?xf32>
+ util.return %42 : tensor<12x16x?xf32>
}
// Dynamic shape case is not supported yet by the Vulkan codegen. See #9802.
-// CHECK-LABEL: func.func @reduction_broadcast_elementwise_dynamic
+// CHECK-LABEL: util.func public @reduction_broadcast_elementwise_dynamic
// CHECK: flow.dispatch.workgroups
// CHECK: linalg.generic
// CHECK: linalg.generic
@@ -1729,7 +1729,7 @@
#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
module {
- func.func @softmax(%arg0: tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
+ util.func public @softmax(%arg0: tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant -3.40282347E+38 : f32
@@ -1755,10 +1755,10 @@
%8 = arith.mulf %arg1, %7 : f32
linalg.yield %8 : f32
} -> tensor<12x128x128xf32>
- return %6 : tensor<12x128x128xf32>
+ util.return %6 : tensor<12x128x128xf32>
}
}
-// CHECK-LABEL: func @softmax(
+// CHECK-LABEL: util.func public @softmax(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<12x128x128xf32>
// CHECK: %[[DISPATCH0:.+]] = flow.dispatch.workgroups
// CHECK-SAME: (%[[ARG0]])
@@ -1783,7 +1783,7 @@
// CHECK-SAME: ins(%[[GENERIC1]]#0, %[[GENERIC1]]#1 :
// CHECK: flow.dispatch.tensor.store %[[GENERIC2]]
// CHECK: flow.return
-// CHECK: return %[[DISPATCH1]]
+// CHECK: util.return %[[DISPATCH1]]
// -----
@@ -1791,7 +1791,7 @@
#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0)>
#map2 = affine_map<(d0) -> (d0)>
module {
- func.func @batchnorm_training(%arg0: tensor<12xf32>, %arg1: tensor<12x12x12x12x12xf32>, %arg2: tensor<12xf32>) -> (tensor<12xf32>, tensor<12xf32>, tensor<12xf32>) {
+ util.func public @batchnorm_training(%arg0: tensor<12xf32>, %arg1: tensor<12x12x12x12x12xf32>, %arg2: tensor<12xf32>) -> (tensor<12xf32>, tensor<12xf32>, tensor<12xf32>) {
%cst = arith.constant 1.420000e+00 : f32
%cst_0 = arith.constant 1.450000e+00 : f32
%cst_1 = arith.constant 1.300000e+00 : f32
@@ -1815,10 +1815,10 @@
%9 = arith.subf %arg3, %8 : f32
linalg.yield %5, %6, %9 : f32, f32, f32
} -> (tensor<12xf32>, tensor<12xf32>, tensor<12xf32>)
- return %3#0, %3#1, %3#2 : tensor<12xf32>, tensor<12xf32>, tensor<12xf32>
+ util.return %3#0, %3#1, %3#2 : tensor<12xf32>, tensor<12xf32>, tensor<12xf32>
}
}
-// CHECK-LABEL: func @batchnorm_training(
+// CHECK-LABEL: util.func public @batchnorm_training(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<12xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<12x12x12x12x12xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<12xf32>
@@ -1839,17 +1839,17 @@
// CHECK-DAG: flow.dispatch.tensor.store %[[GENERIC1]]#1
// CHECK-DAG: flow.dispatch.tensor.store %[[GENERIC1]]#2
// CHECK: flow.return
-// CHECK: return %[[DISPATCH]]#0, %[[DISPATCH]]#1, %[[DISPATCH]]#2
+// CHECK: util.return %[[DISPATCH]]#0, %[[DISPATCH]]#1, %[[DISPATCH]]#2
// -----
-func.func @set_encoding_op(%arg0 : tensor<?x?xf32>)
+util.func public @set_encoding_op(%arg0 : tensor<?x?xf32>)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%0 = iree_linalg_ext.set_encoding %arg0
: tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK: func @set_encoding_op
+// CHECK: util.func public @set_encoding_op
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -1872,17 +1872,17 @@
// CHECK: count(%[[WL0:[a-zA-Z0-9]+]]: index, %[[WL1:[a-zA-Z0-9]+]]: index)
// CHECK: %[[X:[a-zA-Z0-9]+]], %[[Y:[a-zA-Z0-9]+]], %[[Z:.+]] = flow.dispatch.workgroup_count_from_slice %[[WL0]], %[[WL1]]
// CHECK: flow.return %[[X]], %[[Y]], %[[Z]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
-func.func @unset_encoding_op(%arg0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>)
+util.func public @unset_encoding_op(%arg0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>)
-> tensor<?x?xf32> {
%0 = iree_linalg_ext.unset_encoding %arg0
: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK: func @unset_encoding_op
+// CHECK: util.func public @unset_encoding_op
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -1905,12 +1905,12 @@
// CHECK: count(%[[WL0:[a-zA-Z0-9]+]]: index, %[[WL1:[a-zA-Z0-9]+]]: index)
// CHECK: %[[X:[a-zA-Z0-9]+]], %[[Y:[a-zA-Z0-9]+]], %[[Z:.+]] = flow.dispatch.workgroup_count_from_slice %[[WL0]], %[[WL1]]
// CHECK: flow.return %[[X]], %[[Y]], %[[Z]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
#map = affine_map<()[s0] -> (-s0 + (s0 ceildiv 16) * 16)>
-func.func @pad_and_set_encoding_op(%arg0 : tensor<?x?xf32>)
+util.func public @pad_and_set_encoding_op(%arg0 : tensor<?x?xf32>)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -1925,11 +1925,11 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%encoding = iree_linalg_ext.set_encoding %pad
: tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %encoding : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %encoding : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> ((s0 ceildiv 16) * 16)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (-s0 + (s0 ceildiv 16) * 16)>
-// CHECK: func.func @pad_and_set_encoding
+// CHECK: util.func public @pad_and_set_encoding
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -1962,20 +1962,20 @@
// CHECK-SAME: %[[WL2:[a-zA-Z0-9]+]]: index, %[[WL3:[a-zA-Z0-9]+]]: index)
// CHECK: %[[X:[a-zA-Z0-9]+]], %[[Y:[a-zA-Z0-9]+]], %[[Z:.+]] = flow.dispatch.workgroup_count_from_slice %[[WL0]], %[[WL1]], %[[WL2]], %[[WL3]]
// CHECK: flow.return %[[X]], %[[Y]], %[[Z]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
-func.func @unset_encoding_and_slice(
+util.func public @unset_encoding_and_slice(
%arg0: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1 : index, %arg2 : index) -> tensor<?x?xf32> {
%0 = iree_linalg_ext.unset_encoding %arg0
: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> -> tensor<?x?xf32>
%1 = tensor.extract_slice %0[0, 0] [%arg1, %arg2] [1, 1]
: tensor<?x?xf32> to tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func @unset_encoding_and_slice
+// CHECK: util.func public @unset_encoding_and_slice
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
@@ -2009,7 +2009,7 @@
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module {
- func.func @root_on_unset_encoding(
+ util.func public @root_on_unset_encoding(
%arg0: tensor<784x96xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1: tensor<96xf32>) -> tensor<784x96xf32> {
%0 = iree_linalg_ext.unset_encoding %arg0 : tensor<784x96xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> -> tensor<784x96xf32>
@@ -2026,12 +2026,12 @@
%5 = arith.addf %in, %in_0 : f32
linalg.yield %5 : f32
} -> tensor<784x96xf32>
- return %4 : tensor<784x96xf32>
+ util.return %4 : tensor<784x96xf32>
}
}
// CHECL: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d1)>
// CHECK: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
-// CHECK: func @root_on_unset_encoding
+// CHECK: util.func public @root_on_unset_encoding
// CHECK-SAME: %[[ARG0:.+]]: tensor<784x96xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:.+]]: tensor<96xf32>
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.workgroups(%[[ARG0]], %[[ARG1]])
@@ -2055,7 +2055,7 @@
// -----
-func.func @gemm_encoded(
+util.func public @gemm_encoded(
%arg0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>,
%arg2 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>)
@@ -2066,9 +2066,9 @@
tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>)
outs(%arg2 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
- return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
+ util.return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
}
-// CHECK: func.func @gemm_encoded
+// CHECK: util.func public @gemm_encoded
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
@@ -2086,7 +2086,7 @@
// -----
-func.func @gemm_fill_encoded(
+util.func public @gemm_fill_encoded(
%arg0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>> {
@@ -2104,9 +2104,9 @@
tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>)
outs(%fill : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
- return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
+ util.return %0 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
}
-// CHECK: func.func @gemm_fill_encoded
+// CHECK: util.func public @gemm_fill_encoded
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32]>>
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.workgroups
@@ -2125,13 +2125,13 @@
// -----
-func.func @extract_slice1(%arg0 : tensor<5x24x48xf32>) -> tensor<4xf32> {
+util.func public @extract_slice1(%arg0 : tensor<5x24x48xf32>) -> tensor<4xf32> {
%0 = tensor.extract_slice %arg0[2, 3, 4] [1, 1, 4] [1, 1, 1]
: tensor<5x24x48xf32> to tensor<4xf32>
- return %0 : tensor<4xf32>
+ util.return %0 : tensor<4xf32>
}
-// CHECK-LABEL: func.func @extract_slice1(
+// CHECK-LABEL: util.func public @extract_slice1(
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x24x48xf32>)
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
@@ -2139,11 +2139,11 @@
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK: %[[SLICE:.+]] = flow.tensor.slice %[[ARG0]][%[[C2]], %[[C3]], %[[C4]] for %[[C1]], %[[C1]], %[[C4]]]
// CHECK: %[[RESULT:.+]] = flow.tensor.reshape %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @clone_fill_ops(%arg0 : tensor<128x256xf32>, %arg1 : tensor<256x512xf32>,
+util.func public @clone_fill_ops(%arg0 : tensor<128x256xf32>, %arg1 : tensor<256x512xf32>,
%arg2 : tensor<128x256xf32>, %arg3 : tensor<256x512xf32>)
-> (tensor<128x512xf32>, tensor<128x512xf32>) {
%0 = tensor.empty() : tensor<128x512xf32>
@@ -2153,9 +2153,9 @@
outs(%1 : tensor<128x512xf32>) -> tensor<128x512xf32>
%3 = linalg.matmul ins(%arg2, %arg3 : tensor<128x256xf32>, tensor<256x512xf32>)
outs(%1 : tensor<128x512xf32>) -> tensor<128x512xf32>
- return %2, %3 : tensor<128x512xf32>, tensor<128x512xf32>
+ util.return %2, %3 : tensor<128x512xf32>, tensor<128x512xf32>
}
-// CHECK-LABEL: func @clone_fill_ops(
+// CHECK-LABEL: util.func public @clone_fill_ops(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x256xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<256x512xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9].+]]: tensor<128x256xf32>
@@ -2173,7 +2173,7 @@
// -----
-func.func @softmax(%source : tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
+util.func public @softmax(%source : tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant -3.40282347E+38 : f32
@@ -2203,9 +2203,9 @@
%10 = arith.mulf %in, %9 : f32
linalg.yield %10 : f32
} -> tensor<12x128x128xf32>
- return %8 : tensor<12x128x128xf32>
+ util.return %8 : tensor<12x128x128xf32>
}
-// CHECK-LABEL: func @softmax(
+// CHECK-LABEL: util.func public @softmax(
// CHECK-SAME: %[[INPUT:.+]]: tensor<12x128x128xf32>)
// CHECK: %[[DISPATCH0:.+]] = flow.dispatch.workgroups
// CHECK-SAME: (%[[INPUT]])
@@ -2240,4 +2240,4 @@
// CHECK-SAME: ins(%[[INPUT]], %[[GENERIC2]] :
// CHECK-SAME: outs(%[[EMPTY2]] :
// CHECK: flow.dispatch.tensor.store %[[GENERIC3]], %[[ARG3]]
-// CHECK: return %[[DISPATCH1]]
+// CHECK: util.return %[[DISPATCH1]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_default.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_default.mlir
index 9237de9..03d8b16 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_default.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_default.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions, iree-flow-clone-producers-into-dispatch-regions, iree-flow-form-dispatch-workgroups), cse, canonicalize, cse)" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions, iree-flow-clone-producers-into-dispatch-regions, iree-flow-form-dispatch-workgroups), cse, canonicalize, cse)" %s | FileCheck %s
-func.func @no_fuse_quantized(%arg0 : tensor<?x113x113x64xi8>, %arg1 : tensor<3x3x64xi8>,
+util.func public @no_fuse_quantized(%arg0 : tensor<?x113x113x64xi8>, %arg1 : tensor<3x3x64xi8>,
%arg2 : i32, %arg3 : i32) -> tensor<?x56x56x64xi8> {
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
@@ -19,9 +19,9 @@
%5 = arith.trunci %b0 : i32 to i8
linalg.yield %5 : i8
} -> tensor<?x56x56x64xi8>
- return %4 : tensor<?x56x56x64xi8>
+ util.return %4 : tensor<?x56x56x64xi8>
}
-// CHECK: func.func @no_fuse_quantized
+// CHECK: util.func public @no_fuse_quantized
// CHECK: flow.dispatch.workgroups
// CHECK: linalg.depthwise_conv_2d_nhwc_hwc_q
// CHECK-NOT: linalg.generic
@@ -32,7 +32,7 @@
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
-func.func @elem_set_encoding(%arg0: tensor<512xf32>, %arg1: tensor<384x512xf32>,
+util.func public @elem_set_encoding(%arg0: tensor<512xf32>, %arg1: tensor<384x512xf32>,
%arg2: tensor<384x512xf32>) -> tensor<384x512xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%0 = tensor.empty() : tensor<384x512xf32>
%1 = linalg.generic {indexing_maps = [#map, #map1, #map1, #map1],
@@ -45,9 +45,9 @@
linalg.yield %4 : f32
} -> tensor<384x512xf32>
%2 = iree_linalg_ext.set_encoding %1 : tensor<384x512xf32> -> tensor<384x512xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %2 : tensor<384x512xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %2 : tensor<384x512xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK-LABEL: func.func @elem_set_encoding
+// CHECK-LABEL: util.func public @elem_set_encoding
// CHECK: flow.dispatch.workgroups
// CHECK: linalg.generic
// CHECK: iree_linalg_ext.set_encoding
@@ -55,7 +55,7 @@
// -----
-func.func @fix_dominance_on_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @fix_dominance_on_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -75,9 +75,9 @@
%addf = arith.addf %b0, %b1 : f32
linalg.yield %addf : f32
} -> tensor<?x?xf32>
- return %bias_add : tensor<?x?xf32>
+ util.return %bias_add : tensor<?x?xf32>
}
-// CHECK-LABEL: func @fix_dominance_on_fusion
+// CHECK-LABEL: util.func public @fix_dominance_on_fusion
// CHECK: %[[RESULT:.+]] = flow.dispatch.workgroups
// CHECK: %[[EMPTY:.+]] = tensor.empty
// CHECK: %[[FILL:.+]] = linalg.fill
@@ -87,4 +87,4 @@
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[GEMM]],
// CHECK: flow.dispatch.tensor.store %[[GENERIC]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion_with_transpose.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion_with_transpose.mlir
index cc7cc7c..b8e7d1d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion_with_transpose.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors_fusion_with_transpose.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(func.func(iree-flow-interchange-transpose-generic-ops,iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-form-dispatch-workgroups, canonicalize, cse))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --verify-diagnostics --pass-pipeline="builtin.module(util.func(iree-flow-interchange-transpose-generic-ops,iree-flow-form-dispatch-regions{fuse-multi-use=true}, iree-flow-form-dispatch-workgroups, canonicalize, cse))" %s | FileCheck %s
-func.func @fuse_batch_matmul_transpose(%a: tensor<4x384x384xf32>, %b: tensor<4x384x32xf32>) -> tensor<384x4x32xf32> {
+util.func public @fuse_batch_matmul_transpose(%a: tensor<4x384x384xf32>, %b: tensor<4x384x32xf32>) -> tensor<384x4x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%init = tensor.empty() : tensor<4x384x32xf32>
%c = linalg.fill ins(%cst : f32) outs(%init : tensor<4x384x32xf32>) -> tensor<4x384x32xf32>
@@ -10,7 +10,7 @@
^bb0(%arg0: f32, %arg1: f32):
linalg.yield %arg0 : f32
} -> tensor<384x4x32xf32>
- return %transpose : tensor<384x4x32xf32>
+ util.return %transpose : tensor<384x4x32xf32>
}
// Check that
@@ -19,7 +19,7 @@
// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
-// CHECK-LABEL: func.func @fuse_batch_matmul_transpose
+// CHECK-LABEL: util.func public @fuse_batch_matmul_transpose
// CHECK: flow.dispatch.workgroups
// CHECK: %[[MATMUL:.+]] = linalg.batch_matmul
// CHECK: linalg.generic
@@ -29,7 +29,7 @@
// -----
-func.func @fuse_matmul_transpose(%a: tensor<128x384xf32>, %b: tensor<384x384xf32>) -> tensor<384x128xf32> {
+util.func public @fuse_matmul_transpose(%a: tensor<128x384xf32>, %b: tensor<384x384xf32>) -> tensor<384x128xf32> {
%cst = arith.constant 0.000000e+00 : f32
%cst1 = arith.constant 1.000000e+00 : f32
%init = tensor.empty() : tensor<128x384xf32>
@@ -41,7 +41,7 @@
%add = arith.addf %arg0, %cst1 : f32
linalg.yield %add : f32
} -> tensor<384x128xf32>
- return %transpose : tensor<384x128xf32>
+ util.return %transpose : tensor<384x128xf32>
}
// Check that
@@ -50,7 +50,7 @@
// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d1, d0)>
-// CHECK-LABEL: func.func @fuse_matmul_transpose
+// CHECK-LABEL: util.func public @fuse_matmul_transpose
// CHECK: flow.dispatch.workgroups
// CHECK: %[[MATMUL:.+]] = linalg.matmul
// CHECK: linalg.generic
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_transform_dialect.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_transform_dialect.mlir
index f4e61aa..cc0f8ce 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_transform_dialect.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_transform_dialect.mlir
@@ -1,45 +1,45 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-dispatch-with-transform-dialect{transform-file-name=%p/transform_dialect_dispatch_spec.mlir}))" %s | \
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-dispatch-with-transform-dialect{transform-file-name=%p/transform_dialect_dispatch_spec.mlir}))" %s | \
// RUN: FileCheck %s
-func.func @tile_matmul_alone(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @tile_matmul_alone(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%1 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func.func @tile_matmul_alone
+// CHECK: util.func public @tile_matmul_alone
// CHECK: flow.dispatch.workgroups
-func.func @tile_matmul_with_constant(
+util.func public @tile_matmul_with_constant(
%arg1 : tensor<5x10xf32>, %arg2 : tensor<10x10xf32>) -> tensor<10x10xf32> {
// The constant is cloned and fused into the dispatch region.
%a = arith.constant dense<1.0> : tensor<10x5xf32>
%1 = linalg.matmul ins(%a, %arg1 : tensor<10x5xf32>, tensor<5x10xf32>)
outs(%arg2 : tensor<10x10xf32>) -> tensor<10x10xf32>
- return %1 : tensor<10x10xf32>
+ util.return %1 : tensor<10x10xf32>
}
-// CHECK: func.func @tile_matmul_with_constant
+// CHECK: util.func public @tile_matmul_with_constant
// CHECK: flow.dispatch.workgroups
// CHECK: arith.constant dense<1.000000e+00> : tensor<10x5xf32>
// Some dummy functions to exercise TSAN under parallelism.
-func.func @foo1() -> index {
+util.func public @foo1() -> index {
%0 = arith.constant 1 : index
- return %0 : index
+ util.return %0 : index
}
-func.func @foo2() -> index {
+util.func public @foo2() -> index {
%0 = arith.constant 2 : index
- return %0 : index
+ util.return %0 : index
}
-func.func @foo3() -> index {
+util.func public @foo3() -> index {
%0 = arith.constant 3 : index
- return %0 : index
+ util.return %0 : index
}
-func.func @foo4() -> index {
+util.func public @foo4() -> index {
%0 = arith.constant 4 : index
- return %0 : index
+ util.return %0 : index
}
-func.func @foo5() -> index {
+util.func public @foo5() -> index {
%0 = arith.constant 5 : index
- return %0 : index
+ util.return %0 : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
index 959dea7..dfb93d5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
@@ -3,29 +3,29 @@
// Basic usage from the `--iree-native-bindings-support` flag.
// CHECK-LABEL: func private @simpleMul
-func.func @simpleMul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.module.export} {
+util.func public @simpleMul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.module.export} {
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32>
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32>
%2 = arith.mulf %0, %1 : tensor<4xf32>
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view
- return %3 : !hal.buffer_view
+ util.return %3 : !hal.buffer_view
}
// CHECK: util.global private @[[GLOBAL_ARG0:.+]] {inlining_policy = #util.inline.never} : !hal.buffer_view
// CHECK: util.global private @[[GLOBAL_ARG1:.+]] {inlining_policy = #util.inline.never} : !hal.buffer_view
-// CHECK: func.func @simpleMul_benchmark() attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "entry"}} {
+// CHECK: util.func public @simpleMul_benchmark() attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "entry"}} {
// CHECK-DAG: %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
// CHECK-DAG: %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : !hal.buffer_view
-// CHECK-NEXT: %[[RET0:.+]] = call @simpleMul(%[[ARG0]], %[[ARG1]])
+// CHECK-NEXT: %[[RET0:.+]] = util.call @simpleMul(%[[ARG0]], %[[ARG1]])
// CHECK: util.optimization_barrier %[[RET0]] : !hal.buffer_view
-// CHECK: return
+// CHECK: util.return
// -----
// Ensures that functions with multiple blocks are handled correctly.
-func.func @while(%start: i32, %bound: i32) -> i32 {
+util.func public @while(%start: i32, %bound: i32) -> i32 {
cf.br ^bb1(%start : i32)
^bb1(%0: i32):
%1 = arith.cmpi slt, %0, %bound : i32
@@ -34,29 +34,29 @@
%4 = arith.addi %3, %3 : i32
cf.br ^bb1(%4 : i32)
^bb3(%5: i32):
- return %5 : i32
+ util.return %5 : i32
}
// CHECK: util.global private @[[GLOBAL_ARG0:.+]] {inlining_policy = #util.inline.never} = 0 : i32
// CHECK: util.global private @[[GLOBAL_ARG1:.+]] {inlining_policy = #util.inline.never} = 0 : i32
-// CHECK: func.func @while_benchmark()
+// CHECK: util.func public @while_benchmark()
// CHECK-DAG: %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : i32
// CHECK-DAG: %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : i32
-// CHECK: %[[RET0:.+]] = call @while(%[[ARG0]], %[[ARG1]])
+// CHECK: %[[RET0:.+]] = util.call @while(%[[ARG0]], %[[ARG1]])
// CHECK: util.optimization_barrier %[[RET0]] : i32
-// CHECK: return
+// CHECK: util.return
// -----
// Ensure the tensors we allocate are of the desired type after casting.
// CHECK-LABEL: func private @importBufferViewBitcasting
-func.func @importBufferViewBitcasting(%view: !hal.buffer_view) -> !hal.buffer_view {
+util.func public @importBufferViewBitcasting(%view: !hal.buffer_view) -> !hal.buffer_view {
%0 = hal.tensor.import %view : !hal.buffer_view -> tensor<2xui32> as tensor<4xi32>
%1 = arith.muli %0, %0 : tensor<4xi32>
%2 = hal.tensor.export %1 : tensor<4xi32> -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// CHECK: util.global private @[[GLOBAL_ARG0:.+]] {inlining_policy = #util.inline.never} : !hal.buffer_view
@@ -66,11 +66,11 @@
// CHECK-DAG: %[[DNO:.+]] = util.optimization_barrier %[[EXPORT]]
// CHECK-NEXT: util.global.store %[[DNO]], @[[GLOBAL_ARG0]]
-// CHECK: func.func @importBufferViewBitcasting_benchmark()
+// CHECK: util.func public @importBufferViewBitcasting_benchmark()
// CHECK-DAG: %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
-// CHECK-NEXT: %[[RET0:.+]] = call @importBufferViewBitcasting(%[[ARG0]])
+// CHECK-NEXT: %[[RET0:.+]] = util.call @importBufferViewBitcasting(%[[ARG0]])
// CHECK: util.optimization_barrier %[[RET0]] : !hal.buffer_view
-// CHECK: return
+// CHECK: util.return
// -----
@@ -78,13 +78,13 @@
// that'll likely cause confusion ((dispatches 0x0x0 work) "whoa so fast!" :).
// expected-error @+1 {{unsupported buffer view import}}
-func.func @importDynamicBufferView(%view: !hal.buffer_view) -> !hal.buffer_view {
+util.func public @importDynamicBufferView(%view: !hal.buffer_view) -> !hal.buffer_view {
%dim0 = hal.buffer_view.dim<%view : !hal.buffer_view>[0] : index
%dim1 = hal.buffer_view.dim<%view : !hal.buffer_view>[1] : index
%0 = hal.tensor.import %view : !hal.buffer_view -> tensor<?x?x4xf32>{%dim0, %dim1}
%1 = arith.mulf %0, %0 : tensor<?x?x4xf32>
%2 = hal.tensor.export %1 : tensor<?x?x4xf32>{%dim0, %dim1} -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// -----
@@ -92,11 +92,11 @@
// We should look for export ops to find the storage size (must be static).
// CHECK-LABEL: func private @exportBufferViewInPlace
-func.func @exportBufferViewInPlace(%view: !hal.buffer_view, %storage: !hal.buffer) -> !hal.buffer_view {
+util.func public @exportBufferViewInPlace(%view: !hal.buffer_view, %storage: !hal.buffer) -> !hal.buffer_view {
%0 = hal.tensor.import %view : !hal.buffer_view -> tensor<4xi32>
%1 = arith.muli %0, %0 : tensor<4xi32>
%2 = hal.tensor.export %1 into(%storage : !hal.buffer) : tensor<4xi32> -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// CHECK: util.global private @[[GLOBAL_ARG0:.+]] {inlining_policy = #util.inline.never} : !hal.buffer_view
@@ -113,9 +113,9 @@
// CHECK-DAG: %[[DNO1:.+]] = util.optimization_barrier %[[EXPORT1]]
// CHECK-NEXT: util.global.store %[[DNO1]], @[[GLOBAL_ARG1]]
-// CHECK: func.func @exportBufferViewInPlace_benchmark()
+// CHECK: util.func public @exportBufferViewInPlace_benchmark()
// CHECK-DAG: %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
// CHECK-DAG: %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : !hal.buffer
-// CHECK-NEXT: %[[RET0:.+]] = call @exportBufferViewInPlace(%[[ARG0]], %[[ARG1]])
+// CHECK-NEXT: %[[RET0:.+]] = util.call @exportBufferViewInPlace(%[[ARG0]], %[[ARG1]])
// CHECK: util.optimization_barrier %[[RET0]] : !hal.buffer_view
-// CHECK: return
+// CHECK: util.return
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fold_unit_dims.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fold_unit_dims.mlir
index 7878e9b..cc9f684 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fold_unit_dims.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fold_unit_dims.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-fold-unit-extent-dims))" %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-fold-unit-extent-dims))" %s | FileCheck %s
-func.func @no_fold_unit_dims_in_dispatches(%arg0 : tensor<1x1x10xf32>) -> tensor<1x1x10xf32> {
+util.func public @no_fold_unit_dims_in_dispatches(%arg0 : tensor<1x1x10xf32>) -> tensor<1x1x10xf32> {
%0 = tensor.empty() : tensor<1x1x10xf32>
%1 = flow.dispatch.region[] -> (tensor<1x1x10xf32>) {
%2 = linalg.generic {
@@ -13,11 +13,11 @@
} -> tensor<1x1x10xf32>
flow.return %2 : tensor<1x1x10xf32>
}
- return %1 : tensor<1x1x10xf32>
+ util.return %1 : tensor<1x1x10xf32>
}
-// CHECK: func @no_fold_unit_dims_in_dispatches(%[[ARG0:.+]]: tensor<1x1x10xf32>)
+// CHECK: util.func public @no_fold_unit_dims_in_dispatches(%[[ARG0:.+]]: tensor<1x1x10xf32>)
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.region
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[ARG0]] : tensor<1x1x10xf32>)
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
index 537149e..26ddb6d 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_regions.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions))" --split-input-file %s | FileCheck %s
-func.func @pack_elementwise_fusion(%arg0 : tensor<?xf32>,
+util.func public @pack_elementwise_fusion(%arg0 : tensor<?xf32>,
%arg1 : tensor<?x?xf32>) -> tensor<?x?x8x32xf32> {
%cst = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
@@ -27,9 +27,9 @@
%9 = tensor.pack %5 padding_value(%cst : f32)
inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %8 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
- return %9 : tensor<?x?x8x32xf32>
+ util.return %9 : tensor<?x?x8x32xf32>
}
-// CHECK-LABEL: func @pack_elementwise_fusion(
+// CHECK-LABEL: util.func public @pack_elementwise_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
@@ -38,11 +38,11 @@
// CHECK-SAME: ins(%[[ARG1]], %[[ARG0]] :
// CHECK: %[[PACK:.+]] = tensor.pack %[[GENERIC]]
// CHECK: flow.return %[[PACK]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
-func.func @pack_fusion(%arg0 : tensor<?x?xf32>,
+util.func public @pack_fusion(%arg0 : tensor<?x?xf32>,
%arg1 : tensor<?x?xf32>) -> tensor<?x?x8x32xf32> {
%cst = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
@@ -80,9 +80,9 @@
%9 = tensor.pack %5 padding_value(%cst : f32)
inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %8 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
- return %9 : tensor<?x?x8x32xf32>
+ util.return %9 : tensor<?x?x8x32xf32>
}
-// CHECK-LABEL: func @pack_fusion(
+// CHECK-LABEL: util.func public @pack_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
@@ -94,7 +94,7 @@
// CHECK-SAME: ins(%[[ARG1]], %[[REDUCTION]] :
// CHECK: %[[PACK:.+]] = tensor.pack %[[GENERIC]]
// CHECK: flow.return %[[PACK]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
@@ -103,7 +103,7 @@
#map2 = affine_map<()[s0] -> (s0 ceildiv 8)>
#map3 = affine_map<()[s0] -> (s0 ceildiv 32)>
module {
- func.func @tranpose_pack_fusion(%arg0: tensor<?x?xf32>) -> tensor<?x?x8x32xf32> {
+ util.func public @tranpose_pack_fusion(%arg0: tensor<?x?xf32>) -> tensor<?x?x8x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -118,12 +118,12 @@
%3 = affine.apply #map3()[%dim_0]
%4 = tensor.empty(%2, %3) : tensor<?x?x8x32xf32>
%pack = tensor.pack %1 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %4 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
- return %pack : tensor<?x?x8x32xf32>
+ util.return %pack : tensor<?x?x8x32xf32>
}
}
// No fusion as the CPU backend currently can't handle fusion with transpose
// between ops.
-// CHECK-LABEL: func @tranpose_pack_fusion(
+// CHECK-LABEL: util.func public @tranpose_pack_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK: %[[DISPATCH1:.+]] = flow.dispatch.region
// CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -132,11 +132,11 @@
// CHECK: %[[DISPATCH2:.+]] = flow.dispatch.region
// CHECK: %[[PACK:.+]] = tensor.pack %[[DISPATCH1]]
// CHECK: flow.return %[[PACK]]
-// CHECK: return %[[DISPATCH2]]
+// CHECK: util.return %[[DISPATCH2]]
// -----
-func.func @set_encoding_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @set_encoding_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index) -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%cst = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
@@ -168,9 +168,9 @@
} -> tensor<?x?xf32>
%6 = iree_linalg_ext.set_encoding %5
: tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %6 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %6 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK-LABEL: func @set_encoding_fusion(
+// CHECK-LABEL: util.func public @set_encoding_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
@@ -182,11 +182,11 @@
// CHECK-SAME: ins(%[[ARG1]], %[[REDUCTION]] :
// CHECK: %[[PACK:.+]] = iree_linalg_ext.set_encoding %[[GENERIC]]
// CHECK: flow.return %[[PACK]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
-func.func @set_encoding_pad_fusion(%arg0 : tensor<?x?xf32>,
+util.func public @set_encoding_pad_fusion(%arg0 : tensor<?x?xf32>,
%arg1 : index, %arg2 : index) -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%cst = arith.constant 0.0 : f32
%0 = tensor.pad %arg0 low[0, 0] high[%arg1, %arg2] {
@@ -195,19 +195,19 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%1 = iree_linalg_ext.set_encoding %0
: tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %1 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %1 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK-LABEL: func @set_encoding_pad_fusion(
+// CHECK-LABEL: util.func public @set_encoding_pad_fusion(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
// CHECK: %[[PAD:.+]] = tensor.pad %[[ARG0]]
// CHECK: %[[ENCODING:.+]] = iree_linalg_ext.set_encoding %[[PAD]]
// CHECK: flow.return %[[ENCODING]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
-func.func @set_encoding_pad_elementwise_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @set_encoding_pad_elementwise_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : index, %arg3 : index) -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%cst = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
@@ -243,9 +243,9 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%7 = iree_linalg_ext.set_encoding %6
: tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %7 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %7 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK-LABEL: func @set_encoding_pad_elementwise_fusion(
+// CHECK-LABEL: util.func public @set_encoding_pad_elementwise_fusion(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
@@ -258,11 +258,11 @@
// CHECK: %[[PAD:.+]] = tensor.pad %[[GENERIC]]
// CHECK: %[[PACK:.+]] = iree_linalg_ext.set_encoding %[[PAD]]
// CHECK: flow.return %[[PACK]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
-func.func @unset_encoding_elementwise_fusion(
+util.func public @unset_encoding_elementwise_fusion(
%arg0: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1: tensor<?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
@@ -283,9 +283,9 @@
%5 = arith.addf %b0, %b1 : f32
linalg.yield %5 : f32
} -> tensor<?x?xf32>
- return %4 : tensor<?x?xf32>
+ util.return %4 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @unset_encoding_elementwise_fusion(
+// CHECK-LABEL: util.func public @unset_encoding_elementwise_fusion(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xf32>)
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
@@ -293,11 +293,11 @@
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[UNSET_ENCODING]], %[[ARG1]]
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @unset_encoding_slice_elementwise_fusion(
+util.func public @unset_encoding_slice_elementwise_fusion(
%arg0: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>,
%arg1: tensor<?xf32>, %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
@@ -319,9 +319,9 @@
%6 = arith.addf %b0, %b1 : f32
linalg.yield %6 : f32
} -> tensor<?x?xf32>
- return %5 : tensor<?x?xf32>
+ util.return %5 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @unset_encoding_slice_elementwise_fusion(
+// CHECK-LABEL: util.func public @unset_encoding_slice_elementwise_fusion(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xf32>
// CHECK: %[[RESULT0:.+]] = flow.dispatch.region
@@ -329,11 +329,11 @@
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[UNSET_ENCODING]]
// CHECK: %[[GENERIC:.+]] = linalg.generic {{.*}} ins(%[[SLICE]]
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[RESULT0]]
+// CHECK: util.return %[[RESULT0]]
// -----
-func.func @unpack_encoding_elementwise_fusion(
+util.func public @unpack_encoding_elementwise_fusion(
%arg0: tensor<?x?x?x?xf32>,
%arg1: tensor<?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
@@ -360,9 +360,9 @@
%2 = arith.addf %b0, %b1 : f32
linalg.yield %2 : f32
} -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @unpack_encoding_elementwise_fusion(
+// CHECK-LABEL: util.func public @unpack_encoding_elementwise_fusion(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?x?xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xf32>)
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
@@ -370,11 +370,11 @@
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[UNPACK]], %[[ARG1]]
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @unpack_non_intersecting_reduction(
+util.func public @unpack_non_intersecting_reduction(
%arg0: tensor<?x?x?xf32>,
%arg1: tensor<?xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
@@ -400,9 +400,9 @@
%3 = arith.addf %2, %b2 : f32
linalg.yield %3 : f32
} -> tensor<?xf32>
- return %1 : tensor<?xf32>
+ util.return %1 : tensor<?xf32>
}
-// CHECK-LABEL: func @unpack_non_intersecting_reduction(
+// CHECK-LABEL: util.func public @unpack_non_intersecting_reduction(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xf32>)
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
@@ -410,11 +410,11 @@
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: ins(%[[UNPACK]], %[[ARG1]]
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @data_dependent_shape(%arg0 : tensor<f32>, %arg1 : tensor<2xi32>)
+util.func public @data_dependent_shape(%arg0 : tensor<f32>, %arg1 : tensor<2xi32>)
-> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -430,9 +430,9 @@
^bb0(%b0: f32, %b1 : f32):
linalg.yield %b0 : f32
} -> tensor<?x?xf32>
- return %generic : tensor<?x?xf32>
+ util.return %generic : tensor<?x?xf32>
}
-// CHECK: func @data_dependent_shape(
+// CHECK: util.func public @data_dependent_shape(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<2xi32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -452,7 +452,7 @@
// -----
-func.func @no_yield_dead_results(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : tensor<?xf32>) -> tensor<?xf32> {
+util.func public @no_yield_dead_results(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : tensor<?xf32>) -> tensor<?xf32> {
%0:2 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
@@ -462,17 +462,17 @@
%2 = arith.addf %b0, %b2 : f32
linalg.yield %1, %2 : f32, f32
} -> (tensor<?xf32>, tensor<?xf32>)
- return %0#1 : tensor<?xf32>
+ util.return %0#1 : tensor<?xf32>
}
-// CHECK: func @no_yield_dead_results
+// CHECK: util.func public @no_yield_dead_results
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK: flow.return %[[GENERIC]]#1
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @scf_nested_dispatch(%arg0 : tensor<?xi32>) -> (tensor<?xi32>) {
+util.func public @scf_nested_dispatch(%arg0 : tensor<?xi32>) -> (tensor<?xi32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%dim = tensor.dim %arg0, %c0 : tensor<?xi32>
@@ -489,7 +489,7 @@
scf.yield %arg0 : tensor<?xi32>
}
- return %scf : tensor<?xi32>
+ util.return %scf : tensor<?xi32>
}
// CHECK-LABEL: @scf_nested_dispatch
@@ -501,7 +501,7 @@
// -----
-func.func @no_dequantization_fusion(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
+util.func public @no_dequantization_fusion(%arg0: tensor<4096x32x128xi8>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x1x4096xf32>
%1 = tensor.empty() : tensor<4096x32x128xf32>
@@ -531,9 +531,9 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<1x1x4096xf32>
- return %4 : tensor<1x1x4096xf32>
+ util.return %4 : tensor<1x1x4096xf32>
}
-// CHECK: func.func @no_dequantization_fusion
+// CHECK: util.func public @no_dequantization_fusion
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<4096x32x128xi8>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<1x1x32x128xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<4096x32x1xf32>
@@ -553,25 +553,25 @@
// CHECK-SAME: ins(%[[ARG1]], %[[GEN0]] :
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.return %[[GEN1]] :
-// CHECK: return %[[DISP]]
+// CHECK: util.return %[[DISP]]
// -----
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
module {
- func.func @no_dequantization_like_fusion(%arg0: tensor<32x1x16x1x8xi16>, %arg1: tensor<32x344x16x32x8xi4>) -> tensor<32x1x344x1x32xi32> {
+ util.func public @no_dequantization_like_fusion(%arg0: tensor<32x1x16x1x8xi16>, %arg1: tensor<32x344x16x32x8xi4>) -> tensor<32x1x344x1x32xi32> {
%c0_i32 = arith.constant 0 : i32
%0 = tensor.empty() : tensor<32x1x16x1x8xi32>
- %1 = linalg.generic {indexing_maps = [#map, #map],
- iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
+ %1 = linalg.generic {indexing_maps = [#map, #map],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<32x1x16x1x8xi16>) outs(%0 : tensor<32x1x16x1x8xi32>) {
^bb0(%in: i16, %out: i32):
%7 = arith.extsi %in : i16 to i32
linalg.yield %7 : i32
} -> tensor<32x1x16x1x8xi32>
%2 = tensor.empty() : tensor<32x344x16x32x8xi32>
- %3 = linalg.generic {indexing_maps = [#map, #map],
- iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
+ %3 = linalg.generic {indexing_maps = [#map, #map],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]}
ins(%arg1 : tensor<32x344x16x32x8xi4>) outs(%2 : tensor<32x344x16x32x8xi32>) {
^bb0(%in: i4, %out: i32):
%7 = arith.extui %in : i4 to i32
@@ -580,10 +580,10 @@
%4 = tensor.empty() : tensor<32x1x344x1x32xi32>
%5 = linalg.fill ins(%c0_i32 : i32) outs(%4 : tensor<32x1x344x1x32xi32>) -> tensor<32x1x344x1x32xi32>
%7 = linalg.batch_mmt4d ins(%1, %3 : tensor<32x1x16x1x8xi32>, tensor<32x344x16x32x8xi32>) outs(%5 : tensor<32x1x344x1x32xi32>) -> tensor<32x1x344x1x32xi32>
- return %7 : tensor<32x1x344x1x32xi32>
+ util.return %7 : tensor<32x1x344x1x32xi32>
}
}
-// CHECK: func.func @no_dequantization_like_fusion
+// CHECK: util.func public @no_dequantization_like_fusion
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<32x1x16x1x8xi16>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<32x344x16x32x8xi4>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : i32
@@ -605,4 +605,4 @@
// CHECK-SAME: ins(%[[GEN0]], %[[GEN1]] :
// CHECK-SAME: outs(%[[FILL]] :
// CHECK: flow.return %[[MMT4D]] :
-// CHECK: return %[[DISP]]
+// CHECK: util.return %[[DISP]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
index adabc66..98b67e5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_dispatch_workgroups.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-workgroups))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-workgroups))" --split-input-file %s | FileCheck %s
-func.func @existing_count_region(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
+util.func public @existing_count_region(%arg0 : index, %arg1 : index) -> tensor<?x?xf32> {
%c1 = arith.constant 1 : index
%0 = flow.dispatch.region[%arg0, %arg1] -> (tensor<?x?xf32>{%arg0, %arg1}) {
%1 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
@@ -8,16 +8,16 @@
} count(%arg2 : index, %arg3 : index) -> (index, index, index) {
flow.return %arg2, %arg3, %c1 : index, index, index
}
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @existing_count_region(
+// CHECK-LABEL: util.func public @existing_count_region(
// CHECK: count(%[[ARG2:[a-zA-Z0-9]+]]: index, %[[ARG3:[a-zA-Z0-9]+]]: index)
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: flow.return %[[ARG2]], %[[ARG3]], %[[C1]]
// -----
-func.func @simple_test_with_cfg(%arg0: i1) -> (tensor<10x20xf32>) {
+util.func public @simple_test_with_cfg(%arg0: i1) -> (tensor<10x20xf32>) {
%cst = arith.constant dense<1.000000e+00> : tensor<10x20xf32>
%0 = flow.dispatch.region -> (tensor<10x20xf32>) {
%cst_0 = arith.constant dense<1.000000e+00> : tensor<10x20xf32>
@@ -28,9 +28,9 @@
^bb2: // pred: ^bb0
flow.return %cst_0 : tensor<10x20xf32>
}
- return %0 : tensor<10x20xf32>
+ util.return %0 : tensor<10x20xf32>
}
-// CHECK-LABEL: func @simple_test_with_cfg
+// CHECK-LABEL: util.func public @simple_test_with_cfg
// CHECK-SAME: %[[ARG0:.+]]: i1
// CHECK: %[[RESULT:.+]] = flow.dispatch.workgroups(%[[ARG0]])
// CHECK-NEXT: %[[ARG1:.+]]: i1, %[[ARG2:.+]]: !flow.dispatch.tensor
@@ -42,4 +42,4 @@
// CHECK: ^[[BB2:.+]]:
// CHECK: flow.dispatch.tensor.store %[[CST]], %[[ARG2]]
// CHECK: flow.return
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_scalar_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_scalar_dispatches.mlir
index b918a79..c54d8f7 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_scalar_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/form_scalar_dispatches.mlir
@@ -1,7 +1,7 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-form-scalar-dispatches))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-form-scalar-dispatches))" --split-input-file %s | FileCheck %s
#map = affine_map<() -> ()>
-func.func @simpleDAG(
+util.func public @simpleDAG(
%arg0 : tensor<f32>, %arg1 : tensor<f32>, %arg2 : tensor<f32>, %arg3 : tensor<f32>)
-> (tensor<f32>, tensor<f32>) {
%0 = tensor.empty() : tensor<f32>
@@ -23,9 +23,9 @@
%6 = arith.subf %b1, %b0 : f32
linalg.yield %6 : f32
} -> tensor<f32>
- return %1, %5 : tensor<f32>, tensor<f32>
+ util.return %1, %5 : tensor<f32>, tensor<f32>
}
-// CHECK-LABEL: func @simpleDAG(
+// CHECK-LABEL: util.func public @simpleDAG(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<f32>
@@ -41,12 +41,12 @@
// CHECK: count() -> (index, index, index)
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index
// CHECK-NEXT: flow.return %[[C1]], %[[C1]], %[[C1]]
-// CHECK: return %[[RESULT]]#1, %[[RESULT]]#0
+// CHECK: util.return %[[RESULT]]#1, %[[RESULT]]#0
// -----
#map = affine_map<() -> ()>
-func.func @simpleHorizontal(
+util.func public @simpleHorizontal(
%arg0 : tensor<f32>, %arg1 : tensor<f32>, %arg2 : tensor<f32>, %arg3 : tensor<f32>)
-> (tensor<f32>, tensor<f32>) {
%0 = tensor.empty() : tensor<f32>
@@ -68,9 +68,9 @@
%6 = arith.addf %b0, %b0 : f32
linalg.yield %6 : f32
} -> tensor<f32>
- return %3, %5 : tensor<f32>, tensor<f32>
+ util.return %3, %5 : tensor<f32>, tensor<f32>
}
-// CHECK-LABEL: func @simpleHorizontal
+// CHECK-LABEL: util.func public @simpleHorizontal
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<f32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<f32>
@@ -86,7 +86,7 @@
// CHECK: count() -> (index, index, index)
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index
// CHECK-NEXT: flow.return %[[C1]], %[[C1]], %[[C1]]
-// CHECK: return %[[RESULT]]#1, %[[RESULT]]#0
+// CHECK: util.return %[[RESULT]]#1, %[[RESULT]]#0
// -----
@@ -94,7 +94,7 @@
#map1 = affine_map<(d0, d1) -> (d0, d1)>
#map2 = affine_map<(d0, d1) -> (d0)>
#map3 = affine_map<(d0) -> (d0)>
-func.func @interleaving(
+util.func public @interleaving(
%arg0 : tensor<1x1xf32>, %arg1 : tensor<1xf32>, %arg2 : tensor<f32>, %arg3 : tensor<f32>)
-> (tensor<f32>, tensor<1xf32>) {
%cst = arith.constant 0.0 : f32
@@ -128,9 +128,9 @@
%10 = arith.divf %b1, %b0 : f32
linalg.yield %10 : f32
} -> tensor<f32>
- return %9, %7 : tensor<f32>, tensor<1xf32>
+ util.return %9, %7 : tensor<f32>, tensor<1xf32>
}
-// CHECK-LABEL: func @interleaving(
+// CHECK-LABEL: util.func public @interleaving(
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x1xf32>,
// CHECK-SAME: %[[ARG1:.+]]: tensor<1xf32>,
// CHECK-SAME: %[[ARG2:.+]]: tensor<f32>,
@@ -156,4 +156,4 @@
// CHECK-SAME: ins(%[[DISPATCH0]]#0, %[[ARG3]] :
// CHECK-SAME: outs(%[[EMPTY1]] :
// CHECK: flow.return %[[GENERIC3]], %[[GENERIC2]]
-// CHECK: return %[[DISPATCH1]]#0, %[[DISPATCH1]]#1
+// CHECK: util.return %[[DISPATCH1]]#0, %[[DISPATCH1]]#1
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fusion_of_tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fusion_of_tensor_ops.mlir
index 848f241..f3741b6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fusion_of_tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/fusion_of_tensor_ops.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-flow-fusion-of-tensor-ops{fuse-multi-use=true}))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-flow-fusion-of-tensor-ops{fuse-multi-use=true}))" %s | FileCheck %s
-func.func @softmax(%arg0 : tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
+util.func public @softmax(%arg0 : tensor<12x128x128xf32>) -> tensor<12x128x128xf32> {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant -3.40282347E+38 : f32
@@ -38,9 +38,9 @@
%11 = arith.mulf %b0, %b1 : f32
linalg.yield %11 : f32
} -> tensor<12x128x128xf32>
- return %10 : tensor<12x128x128xf32>
+ util.return %10 : tensor<12x128x128xf32>
}
-// CHECK-LABEL: func.func @softmax
+// CHECK-LABEL: util.func public @softmax
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<12x128x128xf32>
// CHECK: %[[INIT0:.+]] = tensor.empty()
// CHECK: %[[FILL0:.+]] = linalg.fill
@@ -63,11 +63,11 @@
// CHECK: %[[GENERIC3:.+]] = linalg.generic
// CHECK-SAME: ins(%[[GENERIC1]], %[[GENERIC2]] :
// CHECK-SAME: outs(%[[INIT1]] :
-// CHECK: return %[[GENERIC3]]
+// CHECK: util.return %[[GENERIC3]]
// -----
-func.func @batchnorm_training(%10 : tensor<12xf32>, %11 : tensor<12x12x12x12x12xf32>, %12 : tensor<12xf32>) -> (tensor<12xf32>, tensor<12xf32>, tensor<12xf32>)
+util.func public @batchnorm_training(%10 : tensor<12xf32>, %11 : tensor<12x12x12x12x12xf32>, %12 : tensor<12xf32>) -> (tensor<12xf32>, tensor<12xf32>, tensor<12xf32>)
{
%cst = arith.constant 1.42 : f32
%cst_1 = arith.constant 1.45 : f32
@@ -111,9 +111,9 @@
%21 = arith.subf %arg1, %20 : f32
linalg.yield %21 : f32
} -> tensor<12xf32>
- return %16, %17, %18 : tensor<12xf32>, tensor<12xf32>, tensor<12xf32>
+ util.return %16, %17, %18 : tensor<12xf32>, tensor<12xf32>, tensor<12xf32>
}
-// CHECK-LABEL: func @batchnorm_training(
+// CHECK-LABEL: util.func public @batchnorm_training(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<12xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<12x12x12x12x12xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<12xf32>
@@ -126,13 +126,13 @@
// CHECK: %[[GENERIC1:.+]]:3 = linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[GENERIC0]] :
// CHECK-SAME: outs(%[[INIT]], %[[INIT]], %[[INIT]] :
-// CHECK: return %[[GENERIC1]]#0, %[[GENERIC1]]#1, %[[GENERIC1]]#2
+// CHECK: util.return %[[GENERIC1]]#0, %[[GENERIC1]]#1, %[[GENERIC1]]#2
// -----
#map = affine_map<(d0, d1) -> (d0, d1)>
module {
- func.func @fuse_only_with_same_marker(%arg0: tensor<5x5xf32>, %arg1: tensor<5x5xf32>) -> (tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>) {
+ util.func public @fuse_only_with_same_marker(%arg0: tensor<5x5xf32>, %arg1: tensor<5x5xf32>) -> (tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>) {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 2.000000e+00 : f32
%cst_1 = arith.constant 3.000000e+00 : f32
@@ -160,10 +160,10 @@
%8 = arith.subf %arg2, %arg3 : f32
linalg.yield %8 : f32
} -> tensor<5x5xf32>
- return %4, %5, %6, %7 : tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>
+ util.return %4, %5, %6, %7 : tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>, tensor<5x5xf32>
}
}
-// CHECK-LABEL: func.func @fuse_only_with_same_marke
+// CHECK-LABEL: util.func public @fuse_only_with_same_marke
// CHECK: linalg.generic
// CHECK-NOT: linalg.generic
@@ -175,7 +175,7 @@
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d3, d4, d5)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2)>
module {
- func.func @fuse_only_projected_perm(%arg0: tensor<16x1082x1922xi8>, %arg1: tensor<32x16x3x3xf32>, %arg2: tensor<32x1080x1920xi32>) -> tensor<32x1080x1920xi32> {
+ util.func public @fuse_only_projected_perm(%arg0: tensor<16x1082x1922xi8>, %arg1: tensor<32x16x3x3xf32>, %arg2: tensor<32x1080x1920xi32>) -> tensor<32x1080x1920xi32> {
%0 = tensor.empty() : tensor<32x16x3x3xi8>
%eltwise = linalg.generic {
indexing_maps = [#map0, #map0],
@@ -200,10 +200,10 @@
linalg.yield %235 : i32
} -> tensor<32x1080x1920xi32>
- return %conv : tensor<32x1080x1920xi32>
+ util.return %conv : tensor<32x1080x1920xi32>
}
}
-// CHECK-LABEL: func.func @fuse_only_projected_perm
+// CHECK-LABEL: util.func public @fuse_only_projected_perm
// CHECK: linalg.generic
// CHECK: linalg.generic
@@ -214,7 +214,7 @@
#map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3, d0)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
module {
- func.func @nofuse_broadcast_compute(%arg0: tensor<702x702x128xf32>, %arg1: tensor<702x702x128xf32>,
+ util.func public @nofuse_broadcast_compute(%arg0: tensor<702x702x128xf32>, %arg1: tensor<702x702x128xf32>,
%arg2: tensor<702x702x128xf32>, %arg3: tensor<702x702x128xf32>) -> tensor<128x702x702xf32> {
%cst = arith.constant dense<1.000000e+00> : tensor<702x702x128xf32>
%cst_0 = arith.constant 0.000000e+00 : f32
@@ -252,10 +252,10 @@
%10 = arith.addf %out, %9 : f32
linalg.yield %10 : f32
} -> tensor<128x702x702xf32>
- return %8 : tensor<128x702x702xf32>
+ util.return %8 : tensor<128x702x702xf32>
}
}
-// CHECK-LABEL: func @nofuse_broadcast_compute(
+// CHECK-LABEL: util.func public @nofuse_broadcast_compute(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<702x702x128xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<702x702x128xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<702x702x128xf32>
@@ -273,11 +273,11 @@
// CHECK: %[[GENERIC2:.+]] = linalg.generic
// CHECK-SAME: ins(%[[GENERIC1]], %[[GENERIC0]] :
// CHECK-SAME: outs(%[[FILL]] :
-// CHECK: return %[[GENERIC2]]
+// CHECK: util.return %[[GENERIC2]]
// -----
-func.func @fuse_iota_ops(%arg0: tensor<10x20xi32>) -> (tensor<10x20xi32>, tensor<10x20xi32>) {
+util.func public @fuse_iota_ops(%arg0: tensor<10x20xi32>) -> (tensor<10x20xi32>, tensor<10x20xi32>) {
%c20 = arith.constant 20 : index
%0 = tensor.empty() : tensor<10x20xi32>
%1 = tensor.empty() : tensor<10x20xindex>
@@ -310,9 +310,9 @@
%9 = arith.muli %8, %b0 : i32
linalg.yield %9 : i32
} -> tensor<10x20xi32>
- return %7, %8 : tensor<10x20xi32>, tensor<10x20xi32>
+ util.return %7, %8 : tensor<10x20xi32>, tensor<10x20xi32>
}
-// CHECK-LABEL: func @fuse_iota_ops(
+// CHECK-LABEL: util.func public @fuse_iota_ops(
// CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xi32>)
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<10x20xi32>
// CHECK: %[[GENERIC1:.+]] = linalg.generic
@@ -329,11 +329,11 @@
// CHECK: linalg.index
// CHECK: arith.muli
// CHECK: linalg.yield
-// CHECK: return %[[GENERIC1]], %[[GENERIC2]]
+// CHECK: util.return %[[GENERIC1]], %[[GENERIC2]]
// -----
-func.func @no_fuse_within_dispatch(%arg0 : tensor<10x20xf32>) -> tensor<10x20xf32> {
+util.func public @no_fuse_within_dispatch(%arg0 : tensor<10x20xf32>) -> tensor<10x20xf32> {
%0 = flow.dispatch.region[] -> (tensor<10x20xf32>) {
%1 = tensor.empty() : tensor<10x20xf32>
%2 = linalg.generic {
@@ -355,18 +355,18 @@
} -> tensor<10x20xf32>
flow.return %3 : tensor<10x20xf32>
}
- return %0 : tensor<10x20xf32>
+ util.return %0 : tensor<10x20xf32>
}
-// CHECK-LABEL: func @no_fuse_within_dispatch
+// CHECK-LABEL: util.func public @no_fuse_within_dispatch
// CHECK: %[[RETURN:.+]] = flow.dispatch.region
// CHECK: linalg.generic
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK: flow.return %[[GENERIC]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
// -----
-func.func @nofuse_by_expand_dequant(%arg0 : tensor<11008x4096xi4>, %arg1 : tensor<11008x32x1xf16>, %arg2 : tensor<11008x32x1xf16>) -> (tensor<11008xf16>) {
+util.func public @nofuse_by_expand_dequant(%arg0 : tensor<11008x4096xi4>, %arg1 : tensor<11008x32x1xf16>, %arg2 : tensor<11008x32x1xf16>) -> (tensor<11008xf16>) {
%cst_1 = arith.constant 0.000000e+00 : f16
%0 = tensor.empty() : tensor<11008x32x128xf16>
%1 = arith.constant dense<0.000000e+00> : tensor<1x1x32x128xf16>
@@ -390,9 +390,9 @@
%13 = arith.addf %12, %out : f16
linalg.yield %13 : f16
} -> tensor<11008xf16>
- return %5 : tensor<11008xf16>
+ util.return %5 : tensor<11008xf16>
}
-// CHECK-LABEL: func.func @nofuse_by_expand_dequant
+// CHECK-LABEL: util.func public @nofuse_by_expand_dequant
// CHECK-COUNT-2: tensor.collapse_shape
// CHECK: %[[DEQUANT:.+]] = linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
@@ -410,7 +410,7 @@
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>
#map5 = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>
#map6 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
-func.func @nofuse_by_collapse_matmul(%arg0: tensor<1x1xi64>, %arg1: tensor<4096x32x128xi4>, %arg2: tensor<4096x32x1xf16>, %arg3: tensor<4096x32x1xf16>) -> tensor<1x1x4096xf16> {
+util.func public @nofuse_by_collapse_matmul(%arg0: tensor<1x1xi64>, %arg1: tensor<4096x32x128xi4>, %arg2: tensor<4096x32x1xf16>, %arg3: tensor<4096x32x1xf16>) -> tensor<1x1x4096xf16> {
%cst = arith.constant 0.000000e+00 : f16
%c32000 = arith.constant 32000 : index
%c0_i64 = arith.constant 0 : i64
@@ -456,14 +456,14 @@
%12 = arith.addf %11, %out : f16
linalg.yield %12 : f16
} -> tensor<1x1x4096xf16>
- return %10 : tensor<1x1x4096xf16>
+ util.return %10 : tensor<1x1x4096xf16>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>
// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
-// CHECK: func.func @nofuse_by_collapse_matmul
+// CHECK: util.func public @nofuse_by_collapse_matmul
// CHECK: %[[DEQUANT:.+]] = linalg.generic {indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP1]], #[[MAP]]],
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
// CHECK-NOT: tensor.collapse_shape %[[DEQUANT]]
@@ -473,7 +473,7 @@
// CHECK-NOT: tensor.expand_shape %[[MATVEC]]
// -----
-func.func @math_sin() {
+util.func public @math_sin() {
%cst = arith.constant 2.000000e+00 : f32
%cst_0 = arith.constant dense<[0.000000e+00, 6.349640e-01, -6.349640e-01, 6.349640e-01]> : tensor<4xf32>
%cst_1 = arith.constant dense<[0.000000e+00, 1.298460e+00, 1.298460e+00, -1.298460e+00]> : tensor<4xf32>
@@ -507,9 +507,9 @@
} -> tensor<4xf32>
check.expect_almost_eq(%4#1, %cst_1) : tensor<4xf32>
check.expect_almost_eq(%5, %cst_0) : tensor<4xf32>
- return
+ util.return
}
-// CHECK-LABEL: func @math_sin()
+// CHECK-LABEL: util.func public @math_sin()
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK-DAG: check.expect_almost_eq(%[[GENERIC]]#0,
// CHECK-DAG: check.expect_almost_eq(%[[GENERIC]]#1,
@@ -517,7 +517,7 @@
// -----
// Check for fix for https://github.com/openxla/iree/issues/14953
-func.func @fix_issue_14953(%arg0: tensor<11008x32x1xf16>, %arg1: tensor<11008x32x1xf16>, %arg2: tensor<1x1x32x128xf16>) -> tensor<1x1x11008xf16> {
+util.func public @fix_issue_14953(%arg0: tensor<11008x32x1xf16>, %arg1: tensor<11008x32x1xf16>, %arg2: tensor<1x1x32x128xf16>) -> tensor<1x1x11008xf16> {
%cst = arith.constant 0.000000e+00 : f16
%cst_0 = arith.constant dense<0> : tensor<11008x32x128xi4>
%3 = util.optimization_barrier %cst_0 : tensor<11008x32x128xi4>
@@ -545,9 +545,9 @@
flow.return %10 : tensor<11008xf16>
}
%expanded = tensor.expand_shape %7 [[0, 1, 2]] : tensor<11008xf16> into tensor<1x1x11008xf16>
- return %expanded : tensor<1x1x11008xf16>
+ util.return %expanded : tensor<1x1x11008xf16>
}
-// CHECK-LABEL: func @fix_issue_14953
+// CHECK-LABEL: util.func public @fix_issue_14953
// CHECK: flow.dispatch.region
// CHECK: %[[GENERIC0:.+]] = linalg.generic
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/initialize_empty_tensors.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/initialize_empty_tensors.mlir
index 1b9a1f5..1050f15 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/initialize_empty_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/initialize_empty_tensors.mlir
@@ -1,31 +1,31 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-flow-initialize-empty-tensors{zero-fill=true}))' --split-input-file %s | FileCheck %s --check-prefix=ZERO-CHECK
-// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-flow-initialize-empty-tensors{zero-fill=false}))' --split-input-file %s | FileCheck %s --check-prefix=EMPTY-CHECK
+// RUN: iree-opt --pass-pipeline='builtin.module(util.func(iree-flow-initialize-empty-tensors{zero-fill=true}))' --split-input-file %s | FileCheck %s --check-prefix=ZERO-CHECK
+// RUN: iree-opt --pass-pipeline='builtin.module(util.func(iree-flow-initialize-empty-tensors{zero-fill=false}))' --split-input-file %s | FileCheck %s --check-prefix=EMPTY-CHECK
-func.func @return_zero_init(%arg0 : index, %arg1 : index) -> (tensor<?x?x42xi32>, tensor<?x42x?xf32>) {
+util.func public @return_zero_init(%arg0 : index, %arg1 : index) -> (tensor<?x?x42xi32>, tensor<?x42x?xf32>) {
%0 = tensor.empty(%arg0, %arg1) : tensor<?x?x42xi32>
%1 = tensor.empty(%arg1, %arg0) : tensor<?x42x?xf32>
- return %0, %1 : tensor<?x?x42xi32>, tensor<?x42x?xf32>
+ util.return %0, %1 : tensor<?x?x42xi32>, tensor<?x42x?xf32>
}
-// ZERO-CHECK: func.func @return_zero_init(
+// ZERO-CHECK: util.func public @return_zero_init(
// ZERO-CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index
// ZERO-CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// ZERO-CHECK-DAG: %[[ZERO_INT:.+]] = arith.constant 0 : i32
// ZERO-CHECK-DAG: %[[ZERO_FLOAT:.+]] = arith.constant 0.000000e+00 : f32
// ZERO-CHECK-DAG: %[[SPLAT_INT:.+]] = flow.tensor.splat %[[ZERO_INT]] : tensor<?x?x42xi32>{%[[ARG0]], %[[ARG1]]}
// ZERO-CHECK-DAG: %[[SPLAT_FLOAT:.+]] = flow.tensor.splat %[[ZERO_FLOAT]] : tensor<?x42x?xf32>{%[[ARG1]], %[[ARG0]]}
-// ZERO-CHECK: return %[[SPLAT_INT]], %[[SPLAT_FLOAT]]
+// ZERO-CHECK: util.return %[[SPLAT_INT]], %[[SPLAT_FLOAT]]
-// EMPTY-CHECK: func.func @return_zero_init(
+// EMPTY-CHECK: util.func public @return_zero_init(
// EMPTY-CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index
// EMPTY-CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
// EMPTY-CHECK-DAG: %[[EMPTY_INT:.+]] = flow.tensor.empty : tensor<?x?x42xi32>{%[[ARG0]], %[[ARG1]]}
// EMPTY-CHECK-DAG: %[[EMPTY_FLOAT:.+]] = flow.tensor.empty : tensor<?x42x?xf32>{%[[ARG1]], %[[ARG0]]}
-// EMPTY-CHECK: return %[[EMPTY_INT]], %[[EMPTY_FLOAT]]
+// EMPTY-CHECK: util.return %[[EMPTY_INT]], %[[EMPTY_FLOAT]]
// -----
-func.func @empty_within_dispatch_workgroup(%arg0: index, %arg1: index) -> tensor<?x?xf32> {
+util.func public @empty_within_dispatch_workgroup(%arg0: index, %arg1: index) -> tensor<?x?xf32> {
%0 = flow.dispatch.workgroups[%arg0, %arg1](%arg0, %arg1, %arg0, %arg1) : (index, index, index, index) -> tensor<?x?xf32>{%arg0, %arg1} =
(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>) {
%1 = tensor.empty(%arg4, %arg5) : tensor<?x?xf32>
@@ -36,14 +36,14 @@
%c1 = arith.constant 1 : index
flow.return %arg2, %arg3, %c1 : index, index, index
}
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// ZERO-CHECK-LABEL: func.func @empty_within_dispatch_workgroup(
+// ZERO-CHECK-LABEL: util.func public @empty_within_dispatch_workgroup(
// ZERO-CHECK: flow.dispatch.workgroup
// ZERO-CHECK: tensor.empty
// ZERO-CHECK: flow.return
-// EMPTY-CHECK-LABEL: func.func @empty_within_dispatch_workgroup(
+// EMPTY-CHECK-LABEL: util.func public @empty_within_dispatch_workgroup(
// EMPTY-CHECK: flow.dispatch.workgroup
// EMPTY-CHECK: tensor.empty
// EMPTY-CHECK: flow.return
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/inject_dispatch_tracing.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/inject_dispatch_tracing.mlir
index c090fd6..870b1bc 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/inject_dispatch_tracing.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/inject_dispatch_tracing.mlir
@@ -1,22 +1,22 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-flow-inject-dispatch-tracing))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-flow-inject-dispatch-tracing))' %s | FileCheck %s
-// CHECK-LABEL: func.func @singleDispatch
+// CHECK-LABEL: util.func public @singleDispatch
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4xf32>)
-func.func @singleDispatch(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+util.func public @singleDispatch(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%c4 = arith.constant 4 : index
// CHECK: flow.tensor.trace "ex::entry0 inputs" = [%[[ARG0]] : tensor<4xf32>]
// CHECK-NEXT: %[[RET0:.+]] = flow.dispatch @ex::@entry0[%c4](%[[ARG0]]) : (tensor<4xf32>) -> tensor<4xf32>
%0 = flow.dispatch @ex::@entry0[%c4](%arg0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK-NEXT: flow.tensor.trace "ex::entry0 outputs" = [%[[RET0]] : tensor<4xf32>]
- // CHECK-NEXT: return %[[RET0]]
- return %0 : tensor<4xf32>
+ // CHECK-NEXT: util.return %[[RET0]]
+ util.return %0 : tensor<4xf32>
}
// -----
-// CHECK-LABEL: func.func @multiDispatch
+// CHECK-LABEL: util.func public @multiDispatch
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4xf32>)
-func.func @multiDispatch(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+util.func public @multiDispatch(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%c4 = arith.constant 4 : index
// CHECK: flow.tensor.trace "ex::entry0 inputs" = [%[[ARG0]] : tensor<4xf32>]
@@ -29,6 +29,6 @@
%1 = flow.dispatch @ex::@entry1[%c4](%0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK-NEXT: flow.tensor.trace "ex::entry1 outputs" = [%[[RET1]] : tensor<4xf32>]
- // CHECK: return %[[RET1]]
- return %1 : tensor<4xf32>
+ // CHECK: util.return %[[RET1]]
+ util.return %1 : tensor<4xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_markers.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_targets.mlir
similarity index 80%
rename from compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_markers.mlir
rename to compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_targets.mlir
index c15f268..3ba7dc5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_markers.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/insert_dispatch_debug_targets.mlir
@@ -4,9 +4,9 @@
// Multiple functions.
-// CHECK-LABEL: func.func @target_func
-// ORDINAL_0-LABEL: func.func @target_func
-func.func @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+// CHECK-LABEL: util.func public @target_func
+// ORDINAL_0-LABEL: util.func public @target_func
+util.func public @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// CHECK: %[[D0:.+]] = flow.dispatch @dispatch_0::@dispatch_0_entry
// ORDINAL_0: flow.tensor.trace "dispatch_0::dispatch_0_entry::0 inputs"
@@ -18,12 +18,12 @@
%2 = flow.dispatch @dispatch_2::@dispatch_2_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view
// CHECK: %[[EXPORT:.+]] = hal.tensor.export %[[D1]] : tensor<4xf32> -> !hal.buffer_view
- // CHECK: return %[[EXPORT]] : !hal.buffer_view
- return %3 : !hal.buffer_view
+ // CHECK: util.return %[[EXPORT]] : !hal.buffer_view
+ util.return %3 : !hal.buffer_view
}
-// CHECK-LABEL: func.func @other_func
-func.func @other_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+// CHECK-LABEL: util.func public @other_func
+util.func public @other_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// CHECK: %[[D3:.+]] = flow.dispatch @dispatch_3::@dispatch_3_entry
%0 = flow.dispatch @dispatch_3::@dispatch_3_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
@@ -38,17 +38,17 @@
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view
// Only break on the symbol as the ordinal specifies a different function.
- // SYMBOL: return %[[BREAK_EXPORT]] : !hal.buffer_view
- // ORDINAL: return %[[ORIGINAL_EXPORT]] : !hal.buffer_view
- return %3 : !hal.buffer_view
+ // SYMBOL: util.return %[[BREAK_EXPORT]] : !hal.buffer_view
+ // ORDINAL: util.return %[[ORIGINAL_EXPORT]] : !hal.buffer_view
+ util.return %3 : !hal.buffer_view
}
// -----
// Break on a dispatch with a different number of results.
-// CHECK-LABEL: func.func @target_func
-func.func @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+// CHECK-LABEL: util.func public @target_func
+util.func public @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// CHECK: %[[D0:.+]] = flow.dispatch @dispatch_0::@dispatch_0_entry
%0 = flow.dispatch @dispatch_0::@dispatch_0_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
@@ -58,32 +58,32 @@
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view
// CHECK: %[[EXPORT_0:.+]] = hal.tensor.export %[[D1]]#0 : tensor<4xf32> -> !hal.buffer_view
// CHECK: %[[EXPORT_1:.+]] = hal.tensor.export %[[D1]]#1 : tensor<4xf32> -> !hal.buffer_view
- // CHECK: return %[[EXPORT_0]], %[[EXPORT_1]] : !hal.buffer_view
- return %3 : !hal.buffer_view
+ // CHECK: util.return %[[EXPORT_0]], %[[EXPORT_1]] : !hal.buffer_view
+ util.return %3 : !hal.buffer_view
}
// -----
// Break/trace on a dispatch not found in the target function should do nothing.
-// CHECK-LABEL: func.func @target_func
-func.func @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+// CHECK-LABEL: util.func public @target_func
+util.func public @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// CHECK: %[[D0:.+]] = flow.dispatch @dispatch_0::@dispatch_0_entry
%0 = flow.dispatch @dispatch_0::@dispatch_0_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
// CHECK: %[[D1:.+]] = hal.tensor.export %[[D0]] : tensor<4xf32> -> !hal.buffer_view
%1 = hal.tensor.export %0 : tensor<4xf32> -> !hal.buffer_view
- // CHECK: return %[[D1]] : !hal.buffer_view
- return %1 : !hal.buffer_view
+ // CHECK: util.return %[[D1]] : !hal.buffer_view
+ util.return %1 : !hal.buffer_view
}
// -----
// Combines tracing and breaking on the same dispatch.
-// CHECK-LABEL: func.func @target_func
+// CHECK-LABEL: util.func public @target_func
// CHECK-SAME: %[[ARG0:.+]]: tensor<4xf32>
-func.func @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+util.func public @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// CHECK: %[[D0:.+]] = flow.dispatch @dispatch_0::@dispatch_0_entry
%0 = flow.dispatch @dispatch_0::@dispatch_0_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
@@ -98,8 +98,8 @@
%2 = flow.dispatch @dispatch_2::@dispatch_2_entry[%c4] (%arg0) : (tensor<4xf32>) -> tensor<4xf32>
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view
// CHECK: %[[EXPORT:.+]] = hal.tensor.export %[[D1]] : tensor<4xf32> -> !hal.buffer_view
- // CHECK: return %[[EXPORT]] : !hal.buffer_view
- return %3 : !hal.buffer_view
+ // CHECK: util.return %[[EXPORT]] : !hal.buffer_view
+ util.return %3 : !hal.buffer_view
}
@@ -107,8 +107,8 @@
// Checks regex matching on a dispatch symbol.
-// CHECK-LABEL: func.func @target_func
-func.func @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
+// CHECK-LABEL: util.func public @target_func
+util.func public @target_func(%arg0: tensor<4xf32>) -> !hal.buffer_view {
%c4 = arith.constant 4 : index
// SYMBOL: flow.tensor.trace "dispatch_1::dispatch_1_entry inputs"
@@ -122,5 +122,5 @@
// SYMBOL-NOT: flow.tensor.trace "dispatch_11::dispatch_11_entry outputs"
%2 = hal.tensor.export %1 : tensor<4xf32> -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_generic_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_generic_ops.mlir
index 87d20ec..73f336f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_generic_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_generic_ops.mlir
@@ -3,10 +3,10 @@
// CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1)>
// CHECK: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d2, d0, d1)>
-// CHECK: func.func @interchange
+// CHECK: util.func public @interchange
// CHECK: linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
-func.func @interchange(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
+util.func public @interchange(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
%0 = linalg.generic {indexing_maps = [
affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>,
affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
@@ -19,5 +19,5 @@
%a = arith.addf %arg5, %m : f32
linalg.yield %a : f32
} -> tensor<?x?x?xf32>
- return %0 : tensor<?x?x?xf32>
+ util.return %0 : tensor<?x?x?xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_transpose_generic_ops.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_transpose_generic_ops.mlir
index e88d704..e4a86b8 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_transpose_generic_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/interchange_transpose_generic_ops.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt --split-input-file --verify-diagnostics --iree-flow-interchange-transpose-generic-ops --canonicalize -cse %s | FileCheck %s
-func.func @batch_matmul_transpose(%a: tensor<4x384x384xf32>, %b: tensor<4x384x32xf32>) -> tensor<384x4x32xf32> {
+util.func public @batch_matmul_transpose(%a: tensor<4x384x384xf32>, %b: tensor<4x384x32xf32>) -> tensor<384x4x32xf32> {
%cst = arith.constant 0.000000e+00 : f32
%init = tensor.empty() : tensor<4x384x32xf32>
%c = linalg.fill ins(%cst : f32) outs(%init : tensor<4x384x32xf32>) -> tensor<4x384x32xf32>
@@ -10,14 +10,14 @@
^bb0(%arg0: f32, %arg1: f32):
linalg.yield %arg0 : f32
} -> tensor<384x4x32xf32>
- return %transpose : tensor<384x4x32xf32>
+ util.return %transpose : tensor<384x4x32xf32>
}
// Check that linalg.generic's input and output indexing maps are exchanged.
// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
-// CHECK-LABEL: func.func @batch_matmul_transpose
+// CHECK-LABEL: util.func public @batch_matmul_transpose
// CHECK: %[[MATMUL:.+]] = linalg.batch_matmul
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
@@ -25,7 +25,7 @@
// -----
-func.func @matmul_transpose(%a: tensor<128x384xf32>, %b: tensor<384x384xf32>) -> tensor<384x128xf32> {
+util.func public @matmul_transpose(%a: tensor<128x384xf32>, %b: tensor<384x384xf32>) -> tensor<384x128xf32> {
%cst = arith.constant 0.000000e+00 : f32
%cst1 = arith.constant 1.000000e+00 : f32
%init = tensor.empty() : tensor<128x384xf32>
@@ -37,14 +37,14 @@
%add = arith.addf %arg0, %cst1 : f32
linalg.yield %add : f32
} -> tensor<384x128xf32>
- return %transpose : tensor<384x128xf32>
+ util.return %transpose : tensor<384x128xf32>
}
// Check that linalg.generic's input and output indexing maps are exchanged.
// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d1, d0)>
-// CHECK-LABEL: func.func @matmul_transpose
+// CHECK-LABEL: util.func public @matmul_transpose
// CHECK: %[[MATMUL:.+]] = linalg.matmul
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
index 7571eb3..70bb373 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
@@ -21,8 +21,8 @@
// Demonstrates the full functionality of an extern dispatch op.
// Note that some fields are optional.
-// CHECK-LABEL: func.func @dispatchExtern
-func.func @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %arg2: i32) -> tensor<8xi32> {
+// CHECK-LABEL: util.func public @dispatchExtern
+util.func public @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %arg2: i32) -> tensor<8xi32> {
%x = arith.constant 100 : index
%y = arith.constant 50 : index
// Dispatch workgroups to the externally defined function "main" in the
@@ -60,6 +60,6 @@
hal.return %ok : i1
} ordinal(200) = [#hal.executable.object<{path = "b.o"}>]
})
- // CHECK: return %[[RESULT]]
- return %result : tensor<8xi32>
+ // CHECK: util.return %[[RESULT]]
+ util.return %result : tensor<8xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_regions.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_regions.mlir
index 67e90e1..0a0f9e5 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_regions.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_regions.mlir
@@ -11,9 +11,9 @@
// CHECK-NEXT: return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @staticShapeDispatch(
+// CHECK-LABEL: util.func public @staticShapeDispatch(
// CHECK-SAME: %[[ARG0:.+]]: tensor<8x4xf32>)
-func.func @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
+util.func public @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
// CHECK-DAG: %[[X:.+]] = arith.constant 100
%x = arith.constant 100 : index
// CHECK-DAG: %[[Y:.+]] = arith.constant 50
@@ -29,8 +29,8 @@
flow.dispatch.tensor.store %ret_value, %ret, offsets=[0, 0], sizes=[4, 8], strides=[1, 1] : tensor<4x8xf32> -> !flow.dispatch.tensor<writeonly:tensor<4x8xf32>>
flow.return
}
- // CHECK-NEXT: return %[[RET]]
- return %0 : tensor<4x8xf32>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %0 : tensor<4x8xf32>
}
// -----
@@ -43,9 +43,9 @@
// CHECK-NEXT: flow.executable.export public @dispatchFnMuli_dispatch_1
// CHECK: func.func @dispatchFnMuli_dispatch_1(
-// CHECK-LABEL: func.func @dispatchFnMuli(
+// CHECK-LABEL: util.func public @dispatchFnMuli(
// CHECK-SAME: %[[ARG0:.+]]: tensor<8x4xf32>)
-func.func @dispatchFnMuli(%arg0 : tensor<8x4xf32>) -> tensor<8x4xf32> {
+util.func public @dispatchFnMuli(%arg0 : tensor<8x4xf32>) -> tensor<8x4xf32> {
// CHECK-DAG: %[[X:.+]] = arith.constant 100
%x = arith.constant 100 : index
// CHECK-DAG: %[[Y:.+]] = arith.constant 50
@@ -72,16 +72,16 @@
flow.dispatch.tensor.store %ret_value, %ret, offsets=[0, 0], sizes=[8, 4], strides=[1, 1] : tensor<8x4xf32> -> !flow.dispatch.tensor<writeonly:tensor<8x4xf32>>
flow.return
}
- // CHECK-NEXT: return %[[RET1]]
- return %1 : tensor<8x4xf32>
+ // CHECK-NEXT: util.return %[[RET1]]
+ util.return %1 : tensor<8x4xf32>
}
// -----
// CHECK: flow.executable private @dispatchFn1_dispatch_0
-// CHECK-LABEL: func.func @dispatchFn1
-func.func @dispatchFn1(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
+// CHECK-LABEL: util.func public @dispatchFn1
+util.func public @dispatchFn1(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
%x = arith.constant 100 : index
%y = arith.constant 50 : index
// CHECK: flow.dispatch @dispatchFn1_dispatch_0::@dispatchFn1_dispatch_0
@@ -93,13 +93,13 @@
) {
flow.return
}
- return %0 : tensor<4x8xf32>
+ util.return %0 : tensor<4x8xf32>
}
// CHECK: flow.executable private @dispatchFn2_dispatch_0
-// CHECK-LABEL: func.func @dispatchFn2
-func.func @dispatchFn2(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
+// CHECK-LABEL: util.func public @dispatchFn2
+util.func public @dispatchFn2(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
%x = arith.constant 100 : index
%y = arith.constant 50 : index
// CHECK: flow.dispatch @dispatchFn2_dispatch_0::@dispatchFn2_dispatch_0
@@ -111,7 +111,7 @@
) {
flow.return
}
- return %0 : tensor<4x8xf32>
+ util.return %0 : tensor<4x8xf32>
}
// -----
@@ -130,9 +130,9 @@
// CHECK: return
// CHECK-NEXT: }
-// CHECK-LABEL: func.func @dynamicShapeDispatch(
+// CHECK-LABEL: util.func public @dynamicShapeDispatch(
// CHECK-SAME: %[[ARG0:.+]]: tensor<7x?x24x?xf32>
-func.func @dynamicShapeDispatch(%arg0 : tensor<7x?x24x?xf32>) -> tensor<?x?x1024xf32> {
+util.func public @dynamicShapeDispatch(%arg0 : tensor<7x?x24x?xf32>) -> tensor<?x?x1024xf32> {
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
// CHECK-DAG: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %c1
@@ -157,14 +157,14 @@
flow.dispatch.tensor.store %ret_tile, %ret, offsets=[0, 0, 0], sizes=[%dim3_capture, %dim1_capture, 1024], strides=[1, 1, 1] : tensor<?x?x1024xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?x1024xf32>>{%dim3_capture, %dim1_capture}
flow.return
}
- // CHECK-NEXT: return %[[RET0]]
- return %ret0 : tensor<?x?x1024xf32>
+ // CHECK-NEXT: util.return %[[RET0]]
+ util.return %ret0 : tensor<?x?x1024xf32>
}
// -----
-// CHECK-LABEL: func.func @dispatchWithCountRegion
-func.func @dispatchWithCountRegion(%arg0: tensor<4xi32>) -> tensor<4xi32> {
+// CHECK-LABEL: util.func public @dispatchWithCountRegion
+util.func public @dispatchWithCountRegion(%arg0: tensor<4xi32>) -> tensor<4xi32> {
%x = arith.constant 100 : index
%y = arith.constant 50 : index
%0 = flow.dispatch.workgroups[%x, %y](%arg0) : (tensor<4xi32>) -> %arg0 =
@@ -174,5 +174,5 @@
%z = arith.constant 1 : index
flow.return %x_capture, %y_capture, %z : index, index, index
}
- return %0 : tensor<4xi32>
+ util.return %0 : tensor<4xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_consumer.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_consumer.mlir
index bfc5809..2945fca 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_consumer.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_consumer.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions{fuse-pad-with-consumers}))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions{fuse-pad-with-consumers}))" --split-input-file %s | FileCheck %s
-func.func @fuse_with_consumer(%arg0 : tensor<?x?x?x?xf32>, %arg1 : index,
+util.func public @fuse_with_consumer(%arg0 : tensor<?x?x?x?xf32>, %arg1 : index,
%arg2 : index, %arg3 : index, %arg4 : index,
%arg5 : tensor<?x?x?x?xf32>, %arg6 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
%cst = arith.constant 42.0 : f32
@@ -10,9 +10,9 @@
} : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
%1 = linalg.conv_2d_nhwc_hwcf ins(%0, %arg5 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
outs(%arg6 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
- return %1 : tensor<?x?x?x?xf32>
+ util.return %1 : tensor<?x?x?x?xf32>
}
-// CHECK-LABEL: func @fuse_with_consumer
+// CHECK-LABEL: util.func public @fuse_with_consumer
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
@@ -22,4 +22,4 @@
// CHECK-SAME: ins(%[[PADDED]], %[[ARG5]] :
// CHECK-SAME: outs(%[[ARG6]] :
// CHECK: flow.return %[[CONV]]
-// CHECK: return %[[RETURN]]
+// CHECK: util.return %[[RETURN]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_producer.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_producer.mlir
index c54e6ce..541a142 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_producer.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pad_fusion_with_producer.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-flow-form-dispatch-regions{fuse-pad-with-producers}))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-flow-form-dispatch-regions{fuse-pad-with-producers}))" --split-input-file %s | FileCheck %s
-func.func @fuse_pad_with_producer(%arg0 : tensor<?x?x?x?xf32>,
+util.func public @fuse_pad_with_producer(%arg0 : tensor<?x?x?x?xf32>,
%arg1 : tensor<?x?x?x?xf32>, %arg2 : tensor<?x?x?x?xf32>,
%arg3 : tensor<?xf32>, %arg4 : index, %arg5 : index, %arg6 : index,
%arg7 : index) -> tensor<?x?x?x?xf32> {
@@ -33,9 +33,9 @@
^bb0(%b0 : index, %b1 : index, %b2 : index, %b3 : index) :
tensor.yield %cst : f32
} : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
- return %4 : tensor<?x?x?x?xf32>
+ util.return %4 : tensor<?x?x?x?xf32>
}
-// CHECK-LABEL: func @fuse_pad_with_producer(
+// CHECK-LABEL: util.func public @fuse_pad_with_producer(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
@@ -48,4 +48,4 @@
// CHECK-SAME: ins(%[[CONV]], %[[ARG3]]
// CHECK: %[[PADDED:.+]] = tensor.pad %[[GENERIC]]
// CHECK: flow.return %[[PADDED]]
-// CHEKC: return %[[RETURN]]
+// CHEKC: util.return %[[RETURN]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
index fcb9309..5485f59 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
@@ -6,7 +6,7 @@
#map2 = affine_map<(d0, d1) -> (d0, d1)>
#map3 = affine_map<(d0, d1) -> ()>
module {
- func.func @main(%arg0: tensor<833xi32>, %arg1: tensor<833x833xf32>, %arg2: tensor<f32>) -> tensor<f32> {
+ util.func public @main(%arg0: tensor<833xi32>, %arg1: tensor<833x833xf32>, %arg2: tensor<f32>) -> tensor<f32> {
%cst = arith.constant 5.66893432E-4 : f32
%0 = tensor.empty() : tensor<833x833xf32>
%1 = linalg.generic {
@@ -35,7 +35,7 @@
%10 = arith.addf %b1, %b0 : f32
linalg.yield %10 : f32
} -> tensor<f32>
- return %9 : tensor<f32>
+ util.return %9 : tensor<f32>
}
}
// Check that the linalg op with two reduction loops get folded into a single reduction
@@ -49,11 +49,11 @@
// CHECK: func.func @[[FUNC1:[a-zA-Z0-9_x]+]]
// CHECK: linalg.generic
// CHECK-SAME: ["reduction"]
-// CHECK: func.func @main(
+// CHECK: util.func public @main(
// CHECK: %[[T0:.+]] = flow.dispatch @[[EXECUTABLE0]]::@[[FUNC0]]
// CHECK: %[[T1:.+]] = flow.tensor.reshape %[[T0]] : tensor<833x833xf32> -> tensor<693889xf32>
// CHECK: %[[T2:.+]] = flow.dispatch @[[EXECUTABLE1]]::@[[FUNC1]](%[[T1]])
-// CHECK: return %[[T2]]
+// CHECK: util.return %[[T2]]
// -----
@@ -63,7 +63,7 @@
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
module {
- func.func @grouped_quantized_matmul(%arg0: tensor<4096x32x128xi4>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
+ util.func public @grouped_quantized_matmul(%arg0: tensor<4096x32x128xi4>, %arg1: tensor<1x1x32x128xf32>, %arg2: tensor<4096x32x1xf32>, %arg3: tensor<4096x32x1xf32>) -> tensor<1x1x4096xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x1x4096xf32>
%1 = tensor.empty() : tensor<4096x32x128xf32>
@@ -82,7 +82,7 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<1x1x4096xf32>
- return %4 : tensor<1x1x4096xf32>
+ util.return %4 : tensor<1x1x4096xf32>
}
}
// Check that the two linalg.generic ops are fused into the same dispatch
@@ -102,7 +102,7 @@
// CHECK: arith.mulf
// CHECK: arith.addf
// CHECK: flow.dispatch.tensor.store %[[GEN1]]
-// CHECK: func.func @grouped_quantized_matmul(
+// CHECK: util.func public @grouped_quantized_matmul(
// CHECK: %[[T0:.+]] = flow.dispatch @[[EXECUTABLE0]]::@[[FUNC0]]
// CHECK: %[[RS:.+]] = flow.tensor.reshape %[[T0]] : tensor<4096xf32> -> tensor<1x1x4096xf32>
-// CHECK: return %[[RS]]
+// CHECK: util.return %[[RS]]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/tensor_pad_to_tensor_insert_slice.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/tensor_pad_to_tensor_insert_slice.mlir
index 4ca0ca7..a23c475 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/tensor_pad_to_tensor_insert_slice.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/tensor_pad_to_tensor_insert_slice.mlir
@@ -1,22 +1,20 @@
// RUN: iree-opt --split-input-file --iree-flow-tensor-pad-to-tensor-insert-slice --canonicalize %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-flow-tensor-pad-to-tensor-insert-slice=skip-one-linalg-use-case --canonicalize %s | FileCheck %s --check-prefix=SKIP
-module {
- func.func @tensor_pad(%arg0 : tensor<?x?xf32>, %arg1 : tensor<f32>, %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
- %c0 = arith.constant 0 : index
- %c4 = arith.constant 4 : index
- %c3 = arith.constant 3 : index
- %0 = tensor.extract %arg1[] : tensor<f32>
- %1 = tensor.pad %arg0 low[%c4, %arg2] high[%arg3, %c3] {
- ^bb0(%arg4: index, %arg5: index):
- tensor.yield %0 : f32
- } : tensor<?x?xf32> to tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
- }
+util.func public @tensor_pad(%arg0 : tensor<?x?xf32>, %arg1 : tensor<f32>, %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
+ %c0 = arith.constant 0 : index
+ %c4 = arith.constant 4 : index
+ %c3 = arith.constant 3 : index
+ %0 = tensor.extract %arg1[] : tensor<f32>
+ %1 = tensor.pad %arg0 low[%c4, %arg2] high[%arg3, %c3] {
+ ^bb0(%arg4: index, %arg5: index):
+ tensor.yield %0 : f32
+ } : tensor<?x?xf32> to tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1 + 4)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 + 3)>
-// CHECK: func.func @tensor_pad
+// CHECK: util.func public @tensor_pad
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<f32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index
@@ -35,25 +33,23 @@
// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][4, %[[ARG2]]] [%[[D0]], %[[D1]]] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-module {
- func.func @tensor_pad_static(%arg0: tensor<12x4xf32>, %arg1: tensor<f32>) -> tensor<18x12xf32> {
- %c4 = arith.constant 4 : index
- %c2 = arith.constant 2 : index
- %c5 = arith.constant 5 : index
- %c3 = arith.constant 3 : index
- %0 = tensor.extract %arg1[] : tensor<f32>
- %1 = tensor.pad %arg0 low[%c4, %c5] high[%c2, %c3] {
- ^bb0(%arg2: index, %arg3: index):
- tensor.yield %0 : f32
- } : tensor<12x4xf32> to tensor<18x12xf32>
- return %1 : tensor<18x12xf32>
- }
+util.func public @tensor_pad_static(%arg0: tensor<12x4xf32>, %arg1: tensor<f32>) -> tensor<18x12xf32> {
+ %c4 = arith.constant 4 : index
+ %c2 = arith.constant 2 : index
+ %c5 = arith.constant 5 : index
+ %c3 = arith.constant 3 : index
+ %0 = tensor.extract %arg1[] : tensor<f32>
+ %1 = tensor.pad %arg0 low[%c4, %c5] high[%c2, %c3] {
+ ^bb0(%arg2: index, %arg3: index):
+ tensor.yield %0 : f32
+ } : tensor<12x4xf32> to tensor<18x12xf32>
+ util.return %1 : tensor<18x12xf32>
}
-// CHECK-LABEL: func.func @tensor_pad_static
+// CHECK-LABEL: util.func public @tensor_pad_static
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<12x4xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<f32>
// CHECK-DAG: %[[VAL:.+]] = tensor.extract %[[ARG1]]
@@ -62,11 +58,11 @@
// CHECK-SAME: ins(%[[VAL]] :
// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][4, 5] [12, 4] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @_main(%arg0: tensor<1x33x33x480xf32>, %arg1: tensor<3x3x480x1xf32>) -> tensor<1x33x33x480xf32> {
+util.func public @_main(%arg0: tensor<1x33x33x480xf32>, %arg1: tensor<3x3x480x1xf32>) -> tensor<1x33x33x480xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.pad %arg0 low[0, 4, 4, 0] high[0, 4, 4, 0] {
^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
@@ -76,14 +72,14 @@
%2 = tensor.collapse_shape %arg1 [[0], [1], [2, 3]] : tensor<3x3x480x1xf32> into tensor<3x3x480xf32>
%3 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x33x33x480xf32>) -> tensor<1x33x33x480xf32>
%4 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<4> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%0, %2 : tensor<1x41x41x480xf32>, tensor<3x3x480xf32>) outs(%3 : tensor<1x33x33x480xf32>) -> tensor<1x33x33x480xf32>
- return %4 : tensor<1x33x33x480xf32>
+ util.return %4 : tensor<1x33x33x480xf32>
}
// CHECK-NOT: tensor.pad
// SKIP: tensor.pad
// ----
-func.func @dispatch_dispatch_0_generic_512x1024_f32(
+util.func public @dispatch_dispatch_0_generic_512x1024_f32(
%arg0: !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>,
%arg1: index, %arg2: index, %arg3: index, %arg4: index,
%arg5: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<512x1024xf32>>>>) {
@@ -100,7 +96,7 @@
} : tensor<512x1024xf32> to tensor<?x?xf32>
%11 = iree_linalg_ext.set_encoding %padded : tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<512x1024xf32>>>
flow.dispatch.tensor.store %11, %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<512x1024xf32>>> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<512x1024xf32>>>>{%0, %1}
- return
+ util.return
}
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/top_level_scf_to_cfg.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/top_level_scf_to_cfg.mlir
index 6b4987e..12503dd 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/top_level_scf_to_cfg.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/top_level_scf_to_cfg.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-top-level-scf-to-cfg))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-top-level-scf-to-cfg))" %s | FileCheck %s
// CHECK-LABEL: @generic_nested_for
// While not super recommended, we do have cases of SCF constructs embedded
@@ -6,7 +6,7 @@
// The normal --convert-scf-to-std pass will produce an illegal linalg op
// (multiple basic blocks). The --iree-top-level-scf-to-cfg should not touch it.
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-func.func @generic_nested_for(%arg0: tensor<?x?x?x?xi32>, %arg1: tensor<?x?x?x?xi32>, %out0: tensor<?x?x?x?xi32>) -> tensor<?x?x?x?xi32> {
+util.func public @generic_nested_for(%arg0: tensor<?x?x?x?xi32>, %arg1: tensor<?x?x?x?xi32>, %out0: tensor<?x?x?x?xi32>) -> tensor<?x?x?x?xi32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c6 = arith.constant 6 : index
@@ -41,5 +41,5 @@
linalg.yield %27 : i32
} -> tensor<?x?x?x?xi32>
- return %0 : tensor<?x?x?x?xi32>
+ util.return %0 : tensor<?x?x?x?xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
index af63269..8610b28 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
@@ -1,16 +1,16 @@
// RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule -allow-unregistered-dialect -split-input-file | FileCheck %s
-// CHECK-LABEL: func @single_op(
+// CHECK-LABEL: util.func public @single_op(
// CHECK-SAME: %[[arg0:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index
-func.func @single_op(%arg0: tensor<?x?xf32>, %s1: index, %s2: index) -> tensor<?x?xf32> {
+util.func public @single_op(%arg0: tensor<?x?xf32>, %s1: index, %s2: index) -> tensor<?x?xf32> {
// CHECK: %[[region:.*]] = flow.dispatch.region -> (tensor<?x?xf32>{%[[s1]], %[[s2]]}) {
// CHECK: %[[slice:.*]] = tensor.extract_slice %[[arg0]]
// CHECK: flow.return %[[slice]]
// CHECK: }
- // CHECK: return %[[region]]
+ // CHECK: util.return %[[region]]
%0 = tensor.extract_slice %arg0 [0, 10] [%s1, %s2] [1, 1]
: tensor<?x?xf32> to tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -23,9 +23,9 @@
// -----
-// CHECK-LABEL: func @clone_preceding(
+// CHECK-LABEL: util.func public @clone_preceding(
// CHECK-SAME: %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index
-func.func @clone_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+util.func public @clone_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[dim0:.*]] = tensor.dim %[[arg1]], %[[c0]]
@@ -36,11 +36,11 @@
// CHECK: %[[insert:.*]] = tensor.insert_slice %[[dummy_clone]] into %[[arg1]]
// CHECK: flow.return %[[insert]]
// CHECK: }
- // CHECK: return %[[dummy]], %[[region]]
+ // CHECK: util.return %[[dummy]], %[[region]]
%0 = "test.dummy"() : () -> (tensor<?x?xf32>)
%1 = tensor.insert_slice %0 into %arg1 [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<?x?xf32>
- return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -55,9 +55,9 @@
// -----
-// CHECK-LABEL: func @move_preceding(
+// CHECK-LABEL: util.func public @move_preceding(
// CHECK-SAME: %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index
-func.func @move_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+util.func public @move_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[dim0:.*]] = tensor.dim %[[arg1]], %[[c0]]
@@ -67,12 +67,12 @@
// CHECK: %[[insert:.*]] = tensor.insert_slice %[[slice]] into %[[arg1]]
// CHECK: flow.return %[[insert]], %[[slice]]
// CHECK: }
- // CHECK: return %[[region]]#0, %[[region]]#1
+ // CHECK: util.return %[[region]]#0, %[[region]]#1
%0 = tensor.extract_slice %arg0 [0, 10] [%s1, %s2] [1, 1]
: tensor<?x?xf32> to tensor<?x?xf32>
%1 = tensor.insert_slice %0 into %arg1 [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<?x?xf32>
- return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -87,18 +87,18 @@
// -----
-// CHECK-LABEL: func @create_region_and_convert_to_workgroups
+// CHECK-LABEL: util.func public @create_region_and_convert_to_workgroups
// CHECK: tensor.empty()
// CHECK: flow.dispatch.workgroups
// CHECK: linalg.matmul
// CHECK: flow.return
-func.func @create_region_and_convert_to_workgroups(
+util.func public @create_region_and_convert_to_workgroups(
%A: tensor<5x3xf32>, %B: tensor<3x5xf32>) -> tensor<5x5xf32> {
%init = tensor.empty() : tensor<5x5xf32>
%matmul = linalg.matmul
ins(%A, %B : tensor<5x3xf32>, tensor<3x5xf32>)
outs(%init : tensor<5x5xf32>) -> tensor<5x5xf32>
- return %matmul : tensor<5x5xf32>
+ util.return %matmul : tensor<5x5xf32>
}
module attributes { transform.with_named_sequence } {
@@ -112,7 +112,7 @@
// -----
-// CHECK-LABEL: func @clone_multiple_preceding
+// CHECK-LABEL: util.func public @clone_multiple_preceding
// CHECK-DAG: arith.constant
// CHECK-DAG: arith.constant
// CHECK-DAG: tensor.dim
@@ -123,7 +123,7 @@
// CHECK-NEXT: "test.second_user"
// CHECK-NEXT: "test.merge1"
// CHECK-NEXT: "test.merge2"
-func.func @clone_multiple_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>) {
+util.func public @clone_multiple_preceding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>) {
%0 = "test.dummy_op"(%arg0) {__tagged__} : (tensor<?x?xf32>) -> (tensor<?x?xf32>)
%1 = "test.first_user"(%0) {__tagged__} : (tensor<?x?xf32>) -> (tensor<?x?xf32>)
%2 = "test.second_user"(%0) {__tagged__} : (tensor<?x?xf32>) -> (tensor<?x?xf32>)
@@ -132,7 +132,7 @@
%4 = "test.merge2"(%1, %3) {__tagged__} : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
%5 = tensor.insert_slice %4 into %arg1 [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<?x?xf32>
- return %5 : tensor<?x?xf32>
+ util.return %5 : tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -147,9 +147,9 @@
// -----
-// CHECK-LABEL: func @move_succeeding(
+// CHECK-LABEL: util.func public @move_succeeding(
// CHECK-SAME: %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index
-func.func @move_succeeding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+util.func public @move_succeeding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[dim0:.*]] = tensor.dim %[[arg1]], %[[c0]]
@@ -159,12 +159,12 @@
// CHECK: %[[insert:.*]] = tensor.insert_slice %[[slice]] into %[[arg1]]
// CHECK: flow.return %[[slice]], %[[insert]]
// CHECK: }
- // CHECK: return %[[region]]#1, %[[region]]#0
+ // CHECK: util.return %[[region]]#1, %[[region]]#0
%0 = tensor.extract_slice %arg0 [0, 10] [%s1, %s2] [1, 1]
: tensor<?x?xf32> to tensor<?x?xf32>
%1 = tensor.insert_slice %0 into %arg1 [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<?x?xf32>
- return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -179,7 +179,7 @@
// -----
-// CHECK-LABEL: func @move_multiple_succeeding
+// CHECK-LABEL: util.func public @move_multiple_succeeding
// CHECK-NEXT: flow.dispatch.region -> (tensor<50x90xf32>, tensor<50x90xf32>, tensor<50x90xf32>, tensor<50x90xf32>, tensor<50x90xf32>, tensor<600x700xf32>)
// CHECK-NEXT: "test.dummy_op"
// CHECK-NEXT: "test.first_user"
@@ -190,7 +190,7 @@
// CHECK-NEXT: flow.return
// CHECK-NEXT: }
// CHECK-NEXT: "test.third_user"
-func.func @move_multiple_succeeding(%arg0: tensor<50x90xf32>, %arg1: tensor<600x700xf32>) -> (tensor<600x700xf32>, tensor<50x90xf32>) {
+util.func public @move_multiple_succeeding(%arg0: tensor<50x90xf32>, %arg1: tensor<600x700xf32>) -> (tensor<600x700xf32>, tensor<50x90xf32>) {
%0 = "test.dummy_op"(%arg0) : (tensor<50x90xf32>) -> (tensor<50x90xf32>)
%1 = "test.first_user"(%0) {__tagged__} : (tensor<50x90xf32>) -> (tensor<50x90xf32>)
%2 = "test.second_user"(%0) {__tagged__} : (tensor<50x90xf32>) -> (tensor<50x90xf32>)
@@ -199,7 +199,7 @@
%4 = "test.merge2"(%1, %3) {__tagged__} : (tensor<50x90xf32>, tensor<50x90xf32>) -> (tensor<50x90xf32>)
%5 = tensor.insert_slice %4 into %arg1 [5, 16] [50, 90] [1, 1] {__tagged__}
: tensor<50x90xf32> into tensor<600x700xf32>
- return %5, %u : tensor<600x700xf32>, tensor<50x90xf32>
+ util.return %5, %u : tensor<600x700xf32>, tensor<50x90xf32>
}
module attributes { transform.with_named_sequence } {
@@ -214,21 +214,21 @@
// -----
-// CHECK-LABEL: func @clone_succeeding(
+// CHECK-LABEL: util.func public @clone_succeeding(
// CHECK-SAME: %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index
-func.func @clone_succeeding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+util.func public @clone_succeeding(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %s1: index, %s2: index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
// CHECK: %[[region:.*]] = flow.dispatch.region -> (tensor<?x?xf32>{%[[s1]], %[[s2]]}) {
// CHECK: %[[slice:.*]] = tensor.extract_slice %[[arg0]]
// CHECK: tensor.insert_slice %[[slice]] into %[[arg1]]
// CHECK: flow.return %[[slice]]
// CHECK: }
// CHECK: %[[insert:.*]] = tensor.insert_slice %[[region]] into %[[arg1]]
- // CHECK: return %[[insert]], %[[region]]
+ // CHECK: util.return %[[insert]], %[[region]]
%0 = tensor.extract_slice %arg0 [0, 10] [%s1, %s2] [1, 1]
: tensor<?x?xf32> to tensor<?x?xf32>
%1 = tensor.insert_slice %0 into %arg1 [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<?x?xf32>
- return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
+ util.return %1, %0 : tensor<?x?xf32>, tensor<?x?xf32>
}
module attributes { transform.with_named_sequence } {
@@ -245,8 +245,8 @@
// This is a regression for reifyDynamicResultDims.
-// CHECK-LABEL: func @reify_result_dims_regression(
-func.func @reify_result_dims_regression(%s1: index, %s2: index) -> (tensor<4x?xf32>) {
+// CHECK-LABEL: util.func public @reify_result_dims_regression(
+util.func public @reify_result_dims_regression(%s1: index, %s2: index) -> (tensor<4x?xf32>) {
// CHECK: %[[dest:.*]] = "test.dummy_dest"
// CHECK: %[[c1:.*]] = arith.constant 1 : index
// CHECK: %[[dim1:.*]] = tensor.dim %[[dest]], %[[c1]]
@@ -255,7 +255,7 @@
// CHECK: %[[insert:.*]] = tensor.insert_slice %[[src]] into %[[dest]]
// CHECK: flow.return %[[insert]]
// CHECK: }
- // CHECK: return %[[region]]
+ // CHECK: util.return %[[region]]
// This op does not implement any interface for querying dynamic result dims.
// Generate a tensor.dim op.
@@ -263,7 +263,7 @@
%src = "test.dummy_src"() : () -> (tensor<?x?xf32>)
%1 = tensor.insert_slice %src into %dest [5, 16] [%s1, %s2] [1, 1]
: tensor<?x?xf32> into tensor<4x?xf32>
- return %1 : tensor<4x?xf32>
+ util.return %1 : tensor<4x?xf32>
}
module attributes { transform.with_named_sequence } {
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/verify_input_ir.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/verify_input_ir.mlir
index 81c8519..f746eab 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/verify_input_ir.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/verify_input_ir.mlir
@@ -1,21 +1,21 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-verify-input-legality))" --verify-diagnostics %s -split-input-file
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-verify-input-legality))" --verify-diagnostics %s -split-input-file
// expected-error@below {{illegal operations still remain}}
-func.func @check_no_stablehlo(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @check_no_stablehlo(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
// expected-error@+1 {{illegal op still exists}}
%0 = stablehlo.add %arg0, %arg1 : tensor<?x?xf32>
// expected-error@+1 {{illegal op still exists}}
%1 = chlo.broadcast_add %0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
// -----
// expected-error@below {{illegal operations still remain}}
-func.func @check_no_tosa(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @check_no_tosa(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
// expected-error@+1 {{illegal op still exists}}
%0 = tosa.add %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
// -----
@@ -23,17 +23,17 @@
// Note: checking that this is illegal even if the op could be folded. This pass
// shouldn't be modifying the IR.
// expected-error@below {{illegal operations still remain}}
-func.func @check_no_unrealized_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+util.func public @check_no_unrealized_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
// expected-error@+1 {{illegal op still exists}}
%0 = builtin.unrealized_conversion_cast %arg0 : tensor<?xf32> to memref<?xf32>
// expected-error@+1 {{illegal op still exists}}
%1 = builtin.unrealized_conversion_cast %0 : memref<?xf32> to tensor<?xf32>
- return %1 : tensor<?xf32>
+ util.return %1 : tensor<?xf32>
}
// -----
-func.func @check_linalg_ok(%conv : tensor<1x112x112x16xf32>, %bias : tensor<16xf32>, %init : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32> {
+util.func public @check_linalg_ok(%conv : tensor<1x112x112x16xf32>, %bias : tensor<16xf32>, %init : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32> {
%result = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
affine_map<(d0, d1, d2, d3) -> (d3)>,
@@ -45,5 +45,5 @@
%0 = arith.addf %arg0, %arg1 : f32
linalg.yield %0 : f32
} -> tensor<1x112x112x16xf32>
- return %result : tensor<1x112x112x16xf32>
+ util.return %result : tensor<1x112x112x16xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
index 4cb30f5..8533479 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
@@ -22,13 +22,13 @@
// CHECK-LABEL: @calculateWorkgroups
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device,
// CHECK-SAME: %[[WORKLOAD_0:.+]]: index, %[[WORKLOAD_1:.+]]: index, %[[WORKLOAD_2:.+]]: index)
-func.func @calculateWorkgroups(%device: !hal.device, %workload_0: index, %workload_1: index, %workload_2: index) -> (index, index, index) {
+util.func public @calculateWorkgroups(%device: !hal.device, %workload_0: index, %workload_1: index, %workload_2: index) -> (index, index, index) {
// CHECK-DAG: %[[WORKGROUP_YZ:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[WORKGROUP_X:.+]] = affine.apply
%workgroups:3 = hal.executable.calculate_workgroups
device(%device : !hal.device)
target(@ex::@variant::@dispatch)
workload([%workload_0, %workload_1, %workload_2]) : index, index, index
- // CHECK: return %[[WORKGROUP_X]], %[[WORKGROUP_YZ]], %[[WORKGROUP_YZ]]
- return %workgroups#0, %workgroups#1, %workgroups#2 : index, index, index
+ // CHECK: util.return %[[WORKGROUP_X]], %[[WORKGROUP_YZ]], %[[WORKGROUP_YZ]]
+ util.return %workgroups#0, %workgroups#1, %workgroups#2 : index, index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
index 3d47ef5..baa017e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/allocator_ops.mlir
@@ -1,20 +1,20 @@
// RUN: iree-opt --split-input-file --canonicalize --iree-convert-hal-to-vm %s | FileCheck %s
// CHECK-LABEL: vm.func private @allocatorAllocate
-func.func @allocatorAllocate(%arg0 : !hal.allocator) -> !hal.buffer {
+util.func public @allocatorAllocate(%arg0 : !hal.allocator) -> !hal.buffer {
// CHECK-DAG: %[[SIZE:.+]] = vm.const.i64 1024
%size = arith.constant 1024 : index
// CHECK-DAG: %[[AFFINITY:.+]] = vm.const.i64 -1
%affinity = arith.constant -1 : i64
// CHECK: %ref = vm.call @hal.allocator.allocate(%arg0, %[[AFFINITY]], %c70, %c3075, %[[SIZE]]) : (!vm.ref<!hal.allocator>, i64, i32, i32, i64) -> !vm.ref<!hal.buffer>
%0 = hal.allocator.allocate<%arg0 : !hal.allocator> affinity(%affinity) type("HostLocal") usage("DispatchStorage|Transfer") : !hal.buffer{%size}
- return %0 : !hal.buffer
+ util.return %0 : !hal.buffer
}
// -----
// CHECK-LABEL: vm.func private @allocatorImport
-func.func @allocatorImport(%arg0 : !hal.allocator, %arg1 : !util.buffer) -> (i1, !hal.buffer) {
+util.func public @allocatorImport(%arg0 : !hal.allocator, %arg1 : !util.buffer) -> (i1, !hal.buffer) {
// CHECK-DAG: %[[OFFSET:.+]] = vm.const.i64 128
%offset = arith.constant 128 : index
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 256
@@ -24,6 +24,6 @@
// CHECK: %[[IMPORTED:.+]] = vm.call @hal.allocator.import(%arg0, %c1, %[[AFFINITY]], %c6, %c3, %arg1, %[[OFFSET]], %[[LENGTH]]) : (!vm.ref<!hal.allocator>, i32, i64, i32, i32, !vm.buffer, i64, i64) -> !vm.ref<!hal.buffer>
%did_import, %buffer = hal.allocator.import<%arg0 : !hal.allocator> source(%arg1 : !util.buffer)[%offset, %length] affinity(%affinity) type("HostVisible|HostCoherent") usage("Transfer") : i1, !hal.buffer
// CHECK: %[[DID_IMPORT:.+]] = vm.cmp.nz.ref %[[IMPORTED]]
- // CHECK: return %[[DID_IMPORT]], %[[IMPORTED]]
- return %did_import, %buffer : i1, !hal.buffer
+ // CHECK: vm.return %[[DID_IMPORT]], %[[IMPORTED]]
+ util.return %did_import, %buffer : i1, !hal.buffer
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_ops.mlir
index 55c7f4c..21c76ab 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_ops.mlir
@@ -2,56 +2,56 @@
// CHECK-LABEL: @buffer_subspan
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_subspan(%buffer : !hal.buffer) -> !hal.buffer {
+util.func public @buffer_subspan(%buffer : !hal.buffer) -> !hal.buffer {
%c42 = arith.constant 42 : index
%c43 = arith.constant 43 : index
// CHECK: %[[RET:.+]] = vm.call @hal.buffer.subspan(%[[BUFFER]], %c42, %c43) {nosideeffects} : (!vm.ref<!hal.buffer>, i64, i64) -> !vm.ref<!hal.buffer>
%subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%c42, %c43] : !hal.buffer
- // CHECK: return %[[RET]]
- return %subspan: !hal.buffer
+ // CHECK: vm.return %[[RET]]
+ util.return %subspan: !hal.buffer
}
// -----
// CHECK-LABEL: @buffer_load_i8
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_load_i8(%buffer: !hal.buffer) -> i8 {
+util.func public @buffer_load_i8(%buffer: !hal.buffer) -> i8 {
%c64 = arith.constant 64 : index
// CHECK: %[[RET:.+]] = vm.call @hal.buffer.load(%[[BUFFER]], %c64, %c1) : (!vm.ref<!hal.buffer>, i64, i32) -> i32
%0 = hal.buffer.load<%buffer: !hal.buffer>[%c64] : i8
- // CHECK: return %[[RET]]
- return %0 : i8
+ // CHECK: vm.return %[[RET]]
+ util.return %0 : i8
}
// -----
// CHECK-LABEL: @buffer_load_i16
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_load_i16(%buffer: !hal.buffer) -> i16 {
+util.func public @buffer_load_i16(%buffer: !hal.buffer) -> i16 {
%c64 = arith.constant 64 : index
// CHECK: %[[RET:.+]] = vm.call @hal.buffer.load(%[[BUFFER]], %c64, %c2) : (!vm.ref<!hal.buffer>, i64, i32) -> i32
%0 = hal.buffer.load<%buffer: !hal.buffer>[%c64] : i16
- // CHECK: return %[[RET]]
- return %0 : i16
+ // CHECK: vm.return %[[RET]]
+ util.return %0 : i16
}
// -----
// CHECK-LABEL: @buffer_load_i32
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_load_i32(%buffer: !hal.buffer) -> i32 {
+util.func public @buffer_load_i32(%buffer: !hal.buffer) -> i32 {
%c64 = arith.constant 64 : index
// CHECK: %[[RET:.+]] = vm.call @hal.buffer.load(%[[BUFFER]], %c64, %c4) : (!vm.ref<!hal.buffer>, i64, i32) -> i32
%0 = hal.buffer.load<%buffer: !hal.buffer>[%c64] : i32
- // CHECK: return %[[RET]]
- return %0 : i32
+ // CHECK: vm.return %[[RET]]
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @buffer_load_i64
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_load_i64(%buffer: !hal.buffer) -> i64 {
+util.func public @buffer_load_i64(%buffer: !hal.buffer) -> i64 {
%c64 = arith.constant 64 : index
// CHECK-DAG: %[[OFFSET_HI:.+]] = vm.add.i64 %c64, %c4
@@ -65,61 +65,61 @@
// CHECK: %[[RET:.+]] = vm.or.i64 %[[LO_I64]], %[[HI_I64]]
%0 = hal.buffer.load<%buffer: !hal.buffer>[%c64] : i64
- // CHECK: return %[[RET]]
- return %0 : i64
+ // CHECK: vm.return %[[RET]]
+ util.return %0 : i64
}
// -----
// CHECK-LABEL: @buffer_load_f32
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @buffer_load_f32(%buffer: !hal.buffer) -> f32 {
+util.func public @buffer_load_f32(%buffer: !hal.buffer) -> f32 {
%c64 = arith.constant 64 : index
// CHECK: %[[RET_I32:.+]] = vm.call @hal.buffer.load(%[[BUFFER]], %c64, %c4) : (!vm.ref<!hal.buffer>, i64, i32) -> i32
%0 = hal.buffer.load<%buffer: !hal.buffer>[%c64] : f32
// CHECK: %[[RET:.+]] = vm.bitcast.i32.f32 %[[RET_I32]]
- // CHECK: return %[[RET]]
- return %0 : f32
+ // CHECK: vm.return %[[RET]]
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @buffer_store_i8
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[VALUE:.+]]: i32)
-func.func @buffer_store_i8(%buffer: !hal.buffer, %value: i8) {
+util.func public @buffer_store_i8(%buffer: !hal.buffer, %value: i8) {
%c64 = arith.constant 64 : index
// CHECK: vm.call @hal.buffer.store(%[[VALUE]], %[[BUFFER]], %c64, %c1) : (i32, !vm.ref<!hal.buffer>, i64, i32) -> ()
hal.buffer.store<%buffer : !hal.buffer>[%c64] value(%value : i8)
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_store_i16
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[VALUE:.+]]: i32)
-func.func @buffer_store_i16(%buffer: !hal.buffer, %value: i16) {
+util.func public @buffer_store_i16(%buffer: !hal.buffer, %value: i16) {
%c64 = arith.constant 64 : index
// CHECK: vm.call @hal.buffer.store(%[[VALUE]], %[[BUFFER]], %c64, %c2) : (i32, !vm.ref<!hal.buffer>, i64, i32) -> ()
hal.buffer.store<%buffer : !hal.buffer>[%c64] value(%value : i16)
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_store_i32
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[VALUE:.+]]: i32)
-func.func @buffer_store_i32(%buffer: !hal.buffer, %value: i32) {
+util.func public @buffer_store_i32(%buffer: !hal.buffer, %value: i32) {
%c64 = arith.constant 64 : index
// CHECK: vm.call @hal.buffer.store(%[[VALUE]], %[[BUFFER]], %c64, %c4) : (i32, !vm.ref<!hal.buffer>, i64, i32) -> ()
hal.buffer.store<%buffer : !hal.buffer>[%c64] value(%value : i32)
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_store_i64
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[VALUE:.+]]: i64)
-func.func @buffer_store_i64(%buffer: !hal.buffer, %value: i64) {
+util.func public @buffer_store_i64(%buffer: !hal.buffer, %value: i64) {
%c64 = arith.constant 64 : index
// CHECK-DAG: %[[VALUE_LO:.+]] = vm.trunc.i64.i32 %[[VALUE]]
@@ -131,17 +131,17 @@
// CHECK: vm.call @hal.buffer.store(%[[VALUE_HI]], %[[BUFFER]], %[[OFFSET_HI]], %c4) : (i32, !vm.ref<!hal.buffer>, i64, i32) -> ()
hal.buffer.store<%buffer : !hal.buffer>[%c64] value(%value : i64)
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_store_f32
// CHECK-SAME: (%[[BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[VALUE:.+]]: f32)
-func.func @buffer_store_f32(%buffer: !hal.buffer, %value: f32) {
+util.func public @buffer_store_f32(%buffer: !hal.buffer, %value: f32) {
%c64 = arith.constant 64 : index
// CHECK: %[[VALUE_I32:.+]] = vm.bitcast.f32.i32 %[[VALUE]]
// CHECK: vm.call @hal.buffer.store(%[[VALUE_I32]], %[[BUFFER]], %c64, %c4) : (i32, !vm.ref<!hal.buffer>, i64, i32) -> ()
hal.buffer.store<%buffer : !hal.buffer>[%c64] value(%value : f32)
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_view_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_view_ops.mlir
index ff713e6..3d913a1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_view_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/buffer_view_ops.mlir
@@ -1,28 +1,28 @@
// RUN: iree-opt --split-input-file --iree-convert-hal-to-vm --iree-vm-target-index-bits=32 %s | FileCheck %s
// CHECK-LABEL: @element_type
-func.func @element_type() -> i32 {
+util.func public @element_type() -> i32 {
// CHECK: %[[RET:.+]] = vm.const.i32 553648160
%element_type = hal.element_type<f32> : i32
- // CHECK: return %[[RET]]
- return %element_type : i32
+ // CHECK: vm.return %[[RET]]
+ util.return %element_type : i32
}
// -----
// CHECK-LABEL: @encoding_type
-func.func @encoding_type() -> i32 {
+util.func public @encoding_type() -> i32 {
// CHECK: %[[RET:.+]] = vm.const.i32 1
%encoding_type = hal.encoding_type<dense_row_major> : i32
- // CHECK: return %[[RET]]
- return %encoding_type : i32
+ // CHECK: vm.return %[[RET]]
+ util.return %encoding_type : i32
}
// -----
// CHECK-LABEL: vm.func private @buffer_view_dims
// CHECK-SAME: %[[VIEW:.+]]: !vm.ref<!hal.buffer_view>
-func.func @buffer_view_dims(%arg0 : !hal.buffer_view) -> (index, index, index) {
+util.func public @buffer_view_dims(%arg0 : !hal.buffer_view) -> (index, index, index) {
// CHECK-DAG: %[[D0_64:.+]] = vm.call @hal.buffer_view.dim(%[[VIEW]], %zero)
// CHECK-DAG: %[[D1_64:.+]] = vm.call @hal.buffer_view.dim(%[[VIEW]], %c1)
// CHECK-DAG: %[[D2_64:.+]] = vm.call @hal.buffer_view.dim(%[[VIEW]], %c2)
@@ -33,5 +33,5 @@
// CHECK-DAG: %[[D1_32:.+]] = vm.trunc.i64.i32 %[[D1_64]]
// CHECK-DAG: %[[D2_32:.+]] = vm.trunc.i64.i32 %[[D2_64]]
// CHECK-NEXT: vm.return %[[D0_32]], %[[D1_32]], %[[D2_32]]
- return %0, %1, %2 : index, index, index
+ util.return %0, %1, %2 : index, index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/channel_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/channel_ops.mlir
index 749fafa..ebc0f71 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/channel_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/channel_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @channel_create
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64, %[[ID:.+]]: !vm.buffer, %[[GROUP:.+]]: !vm.buffer, %[[RANK:.+]]: i32, %[[COUNT:.+]]: i32) -> !vm.ref<!hal.channel>
-func.func @channel_create(%device: !hal.device, %affinity: i64, %id: !util.buffer, %group: !util.buffer, %rank: i32, %count: i32) -> !hal.channel {
+util.func public @channel_create(%device: !hal.device, %affinity: i64, %id: !util.buffer, %group: !util.buffer, %rank: i32, %count: i32) -> !hal.channel {
// CHECK: %[[FLAGS:.+]] = vm.const.i32.zero
// CHECK: %[[CHANNEL:.+]] = vm.call @hal.channel.create(%[[DEVICE]], %[[AFFINITY]], %[[FLAGS]], %[[ID]], %[[GROUP]], %[[RANK]], %[[COUNT]])
%channel = hal.channel.create device(%device : !hal.device)
@@ -12,32 +12,32 @@
group(%group)
rank(%rank)
count(%count) : !hal.channel
- // CHECK: return %[[CHANNEL]]
- return %channel : !hal.channel
+ // CHECK: vm.return %[[CHANNEL]]
+ util.return %channel : !hal.channel
}
// -----
// CHECK-LABEL: @channel_split
// CHECK-SAME: (%[[BASE_CHANNEL:.+]]: !vm.ref<!hal.channel>, %[[COLOR:.+]]: i32, %[[KEY:.+]]: i32)
-func.func @channel_split(%base_channel: !hal.channel, %color: i32, %key: i32) -> !hal.channel {
+util.func public @channel_split(%base_channel: !hal.channel, %color: i32, %key: i32) -> !hal.channel {
// CHECK: %[[FLAGS:.+]] = vm.const.i32.zero
// CHECK: %[[SPLIT_CHANNEL:.+]] = vm.call @hal.channel.split(%[[BASE_CHANNEL]], %[[COLOR]], %[[KEY]], %[[FLAGS]])
%split_channel = hal.channel.split<%base_channel : !hal.channel>
color(%color)
key(%key)
flags(0) : !hal.channel
- // CHECK: return %[[SPLIT_CHANNEL]]
- return %split_channel : !hal.channel
+ // CHECK: vm.return %[[SPLIT_CHANNEL]]
+ util.return %split_channel : !hal.channel
}
// -----
// CHECK-LABEL: @channel_rank_and_count
// CHECK-SAME: %[[CHANNEL:.+]]: !vm.ref<!hal.channel>
-func.func @channel_rank_and_count(%channel: !hal.channel) -> (i32, i32) {
+util.func public @channel_rank_and_count(%channel: !hal.channel) -> (i32, i32) {
// CHECK: %[[RANK_COUNT:.+]]:2 = vm.call @hal.channel.rank_and_count(%[[CHANNEL]])
%rank, %count = hal.channel.rank_and_count<%channel : !hal.channel> : i32, i32
- // CHECK: return %[[RANK_COUNT]]#0, %[[RANK_COUNT]]#1
- return %rank, %count : i32, i32
+ // CHECK: vm.return %[[RANK_COUNT]]#0, %[[RANK_COUNT]]#1
+ util.return %rank, %count : i32, i32
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/command_buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/command_buffer_ops.mlir
index db6304d..432a76e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/command_buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/command_buffer_ops.mlir
@@ -1,34 +1,34 @@
// RUN: iree-opt --split-input-file --iree-convert-hal-to-vm --canonicalize --iree-vm-target-index-bits=32 %s | FileCheck %s
// CHECK-LABEL: @command_buffer_create
-func.func @command_buffer_create(%arg0: !hal.device) {
+util.func public @command_buffer_create(%arg0: !hal.device) {
// CHECK: %ref = vm.call @hal.command_buffer.create(%arg0, %c1, %c3, %zero) : (!vm.ref<!hal.device>, i32, i32, i32) -> !vm.ref<!hal.command_buffer>
%cmd = hal.command_buffer.create device(%arg0 : !hal.device) mode("OneShot") categories("Transfer|Dispatch") : !hal.command_buffer
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_create_bindings
-func.func @command_buffer_create_bindings(%arg0: !hal.device, %arg1: index) {
+util.func public @command_buffer_create_bindings(%arg0: !hal.device, %arg1: index) {
// CHECK: %ref = vm.call @hal.command_buffer.create(%arg0, %c1, %c3, %arg1) : (!vm.ref<!hal.device>, i32, i32, i32) -> !vm.ref<!hal.command_buffer>
%cmd = hal.command_buffer.create device(%arg0 : !hal.device) mode("OneShot") categories("Transfer|Dispatch") bindings(%arg1) : !hal.command_buffer
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_finalize
-func.func @command_buffer_finalize(%arg0: !hal.command_buffer) {
+util.func public @command_buffer_finalize(%arg0: !hal.command_buffer) {
// CHECK: vm.call @hal.command_buffer.finalize(%arg0) : (!vm.ref<!hal.command_buffer>) -> ()
hal.command_buffer.finalize<%arg0 : !hal.command_buffer>
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_execution_barrier
-func.func @command_buffer_execution_barrier(
+util.func public @command_buffer_execution_barrier(
%arg0: !hal.command_buffer,
%arg1: !hal.buffer
) {
@@ -37,13 +37,13 @@
source("CommandIssue")
target("CommandProcess")
flags("None")
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_fill_buffer_i8
-func.func @command_buffer_fill_buffer_i8(
+util.func public @command_buffer_fill_buffer_i8(
%arg0: !hal.command_buffer,
%arg1: !hal.buffer,
%arg2: i8
@@ -56,13 +56,13 @@
hal.command_buffer.fill_buffer<%arg0 : !hal.command_buffer>
target(%arg1 : !hal.buffer)[%c100, %c200]
pattern(%arg2 : i8)
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_fill_buffer_i16
-func.func @command_buffer_fill_buffer_i16(
+util.func public @command_buffer_fill_buffer_i16(
%arg0: !hal.command_buffer,
%arg1: !hal.buffer,
%arg2: i16
@@ -75,13 +75,13 @@
hal.command_buffer.fill_buffer<%arg0 : !hal.command_buffer>
target(%arg1 : !hal.buffer)[%c100, %c200]
pattern(%arg2 : i16)
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_fill_buffer_i32
-func.func @command_buffer_fill_buffer_i32(
+util.func public @command_buffer_fill_buffer_i32(
%arg0: !hal.command_buffer,
%arg1: !hal.buffer,
%arg2: i32
@@ -93,13 +93,13 @@
hal.command_buffer.fill_buffer<%arg0 : !hal.command_buffer>
target(%arg1 : !hal.buffer)[%c100, %c200]
pattern(%arg2 : i32)
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_copy_buffer
-func.func @command_buffer_copy_buffer(
+util.func public @command_buffer_copy_buffer(
%arg0: !hal.command_buffer,
%arg1: !hal.buffer
) {
@@ -111,7 +111,7 @@
source(%arg1 : !hal.buffer)[%c100]
target(%arg1 : !hal.buffer)[%c200]
length(%c300)
- return
+ util.return
}
// -----
@@ -122,7 +122,7 @@
// CHECK-SAME: %[[PARAM:.+]]: i32,
// CHECK-SAME: %[[SEND_BUFFER:.+]]: !vm.ref<!hal.buffer>, %[[RECV_BUFFER:.+]]: !vm.ref<!hal.buffer>,
// CHECK-SAME: %[[COUNT:.+]]: i32)
-func.func @command_buffer_collective_all_reduce_sum(
+util.func public @command_buffer_collective_all_reduce_sum(
%cmd: !hal.command_buffer,
%channel: !hal.channel,
%param: i32,
@@ -146,7 +146,7 @@
send(%send_buffer : !hal.buffer)[%c10, %c128]
recv(%recv_buffer : !hal.buffer)[%c20, %c256]
count(%count)
- return
+ util.return
}
// -----
@@ -157,7 +157,7 @@
// CHECK-SAME: %[[PARAM:.+]]: i32,
// CHECK-SAME: %[[SEND_BUFFER:.+]]: !vm.ref<!hal.buffer>,
// CHECK-SAME: %[[COUNT:.+]]: i32)
-func.func @command_buffer_collective_send(
+util.func public @command_buffer_collective_send(
%cmd: !hal.command_buffer,
%channel: !hal.channel,
%param: i32,
@@ -179,7 +179,7 @@
param(%param : i32)
send(%send_buffer : !hal.buffer)[%c10, %c128]
count(%count)
- return
+ util.return
}
// -----
@@ -189,7 +189,7 @@
// CHECK-SAME: %[[LAYOUT:.+]]: !vm.ref<!hal.pipeline_layout>,
// CHECK-SAME: %[[BUFFER:.+]]: !vm.ref<!hal.buffer>,
// CHECK-SAME: %[[SLOT:.+]]: i32
-func.func @command_buffer_push_descriptor_set(
+util.func public @command_buffer_push_descriptor_set(
%cmd: !hal.command_buffer,
%layout: !hal.pipeline_layout,
%buffer: !hal.buffer,
@@ -214,13 +214,13 @@
%c0 = (%buffer : !hal.buffer)[%c4096, %c8000],
%c1 = (%slot : index)[%c4, %c4096]
])
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_dispatch
-func.func @command_buffer_dispatch(
+util.func public @command_buffer_dispatch(
%arg0: !hal.command_buffer,
%arg1: !hal.executable
) {
@@ -231,13 +231,13 @@
hal.command_buffer.dispatch<%arg0 : !hal.command_buffer>
target(%arg1 : !hal.executable)[0]
workgroups([%c100, %c200, %c300])
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_dispatch_indirect
-func.func @command_buffer_dispatch_indirect(
+util.func public @command_buffer_dispatch_indirect(
%arg0: !hal.command_buffer,
%arg1: !hal.executable,
%arg2: !hal.buffer
@@ -247,5 +247,5 @@
hal.command_buffer.dispatch.indirect<%arg0 : !hal.command_buffer>
target(%arg1 : !hal.executable)[0]
workgroups(%arg2 : !hal.buffer)[%c100]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/device_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/device_ops.mlir
index ce88e3f..998d1b1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/device_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/device_ops.mlir
@@ -2,88 +2,88 @@
// CHECK-LABEL: @device_allocator
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_allocator(%device: !hal.device) -> !hal.allocator {
+util.func public @device_allocator(%device: !hal.device) -> !hal.allocator {
// CHECK: %ref = vm.call @hal.device.allocator(%[[DEVICE]]) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator>
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator
- return %allocator : !hal.allocator
+ util.return %allocator : !hal.allocator
}
// -----
// CHECK-LABEL: @device_query_i64
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i64(%device: !hal.device) -> (i1, i64) {
+util.func public @device_query_i64(%device: !hal.device) -> (i1, i64) {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i64
- // CHECK: return %[[RET]]#0, %[[RET]]#1
- return %ok, %value : i1, i64
+ // CHECK: vm.return %[[RET]]#0, %[[RET]]#1
+ util.return %ok, %value : i1, i64
}
// -----
// CHECK-LABEL: @device_query_i64_default
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i64_default(%device: !hal.device) -> i64 {
+util.func public @device_query_i64_default(%device: !hal.device) -> i64 {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i64 = 123 : i64
// CHECK: %[[OUT:.+]] = vm.select.i64 %[[RET]]#0, %[[RET]]#1, %c123 : i64
- // CHECK: return %[[OUT]]
- return %value : i64
+ // CHECK: vm.return %[[OUT]]
+ util.return %value : i64
}
// -----
// CHECK-LABEL: @device_query_i32
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i32(%device: !hal.device) -> (i1, i32) {
+util.func public @device_query_i32(%device: !hal.device) -> (i1, i32) {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
// CHECK: %[[RET_I32:.+]] = vm.trunc.i64.i32 %[[RET]]#1 : i64 -> i32
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i32
- // CHECK: return %[[RET]]#0, %[[RET_I32]]
- return %ok, %value : i1, i32
+ // CHECK: vm.return %[[RET]]#0, %[[RET_I32]]
+ util.return %ok, %value : i1, i32
}
// -----
// CHECK-LABEL: @device_query_i32_default
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i32_default(%device: !hal.device) -> i32 {
+util.func public @device_query_i32_default(%device: !hal.device) -> i32 {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
// CHECK: %[[RET_I32:.+]] = vm.trunc.i64.i32 %[[RET]]#1 : i64 -> i32
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i32 = 123 : i32
// CHECK: %[[OUT:.+]] = vm.select.i32 %[[RET]]#0, %[[RET_I32]], %c123 : i32
- // CHECK: return %[[OUT]]
- return %value : i32
+ // CHECK: vm.return %[[OUT]]
+ util.return %value : i32
}
// -----
// CHECK-LABEL: @device_query_i1
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i1(%device: !hal.device) -> (i1, i1) {
+util.func public @device_query_i1(%device: !hal.device) -> (i1, i1) {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
// CHECK: %[[RET_I32:.+]] = vm.trunc.i64.i32 %[[RET]]#1 : i64 -> i32
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i1
// CHECK: %[[I1:.+]] = vm.and.i32 %[[RET_I32]], %c1 : i32
- // CHECK: return %[[RET]]#0, %[[I1]]
- return %ok, %value : i1, i1
+ // CHECK: vm.return %[[RET]]#0, %[[I1]]
+ util.return %ok, %value : i1, i1
}
// -----
// CHECK-LABEL: @device_query_i1_default
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @device_query_i1_default(%device: !hal.device) -> i1 {
+util.func public @device_query_i1_default(%device: !hal.device) -> i1 {
// CHECK-DAG: %[[NS:.+]] = vm.rodata.inline "_utf8_sys_
// CHECK-DAG: %[[KEY:.+]] = vm.rodata.inline "_utf8_foo_
// CHECK: %[[RET:.+]]:2 = vm.call @hal.device.query.i64(%[[DEVICE]], %[[NS]], %[[KEY]]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i64)
@@ -91,14 +91,14 @@
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i1 = 1 : i1
// CHECK: %[[I1:.+]] = vm.and.i32 %[[RET_I32]], %c1 : i32
// CHECK: %[[OUT:.+]] = vm.select.i32 %[[RET]]#0, %[[I1]], %c1
- // CHECK: return %[[OUT]]
- return %value : i1
+ // CHECK: vm.return %[[OUT]]
+ util.return %value : i1
}
// -----
// CHECK-LABEL: @device_queue_alloca
-func.func @device_queue_alloca(
+util.func public @device_queue_alloca(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
@@ -117,13 +117,13 @@
pool(%c100_i64)
type(DeviceLocal) usage(Transfer)
: !hal.buffer{%size}
- return %buffer : !hal.buffer
+ util.return %buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @device_queue_dealloca
-func.func @device_queue_dealloca(
+util.func public @device_queue_dealloca(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
@@ -138,13 +138,13 @@
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
buffer(%buffer : !hal.buffer)
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_read
-func.func @device_queue_read(
+util.func public @device_queue_read(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
@@ -173,13 +173,13 @@
target(%target_buffer : !hal.buffer)[%target_offset]
length(%length)
flags(0)
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_execute
-func.func @device_queue_execute(
+util.func public @device_queue_execute(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
@@ -194,17 +194,17 @@
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
commands([%cmd0, %cmd1])
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_flush
-func.func @device_queue_flush(
+util.func public @device_queue_flush(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64)
%device: !hal.device, %affinity: i64) {
// CHECK: vm.call @hal.device.queue.flush(%[[DEVICE]], %[[AFFINITY]])
hal.device.queue.flush<%device : !hal.device>
affinity(%affinity)
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/devices_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/devices_ops.mlir
index b8423ad..c3dbee6 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/devices_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/devices_ops.mlir
@@ -1,18 +1,18 @@
// RUN: iree-opt --split-input-file --iree-convert-hal-to-vm --canonicalize --iree-vm-target-index-bits=32 %s | FileCheck %s
// CHECK-LABEL: @devices_count
-func.func @devices_count() -> index {
+util.func public @devices_count() -> index {
// CHECK: = vm.call @hal.devices.count() {nosideeffects} : () -> i32
%device_count = hal.devices.count : index
- return %device_count : index
+ util.return %device_count : index
}
// -----
// CHECK-LABEL: @devices_get
// CHECK-SAME: (%[[INDEX:.+]]: i32)
-func.func @devices_get(%index: index) -> !hal.device {
+util.func public @devices_get(%index: index) -> !hal.device {
// CHECK: = vm.call @hal.devices.get(%[[INDEX]]) {nosideeffects} : (i32) -> !vm.ref<!hal.device>
%device = hal.devices.get %index : !hal.device
- return %device : !hal.device
+ util.return %device : !hal.device
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
index b107b7b..e449189 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
@@ -14,7 +14,7 @@
}
// CHECK-LABEL: @executableCreate
-func.func @executableCreate(
+util.func public @executableCreate(
// CHECK-SAME: %[[DEV:.+]]: !vm.ref<!hal.device>
%device: !hal.device,
// CHECK-SAME: %[[LAYOUT0:.+]]: !vm.ref<!hal.pipeline_layout>,
@@ -40,7 +40,7 @@
%1 = hal.executable.create device(%device : !hal.device) target(@exe::@binary2) layouts([%layout1, %layout0]) : !hal.executable
// CHECK: vm.return %[[EXE1]], %[[EXE2]]
- return %0, %1 : !hal.executable, !hal.executable
+ util.return %0, %1 : !hal.executable, !hal.executable
}
// -----
@@ -61,7 +61,7 @@
}
// CHECK-LABEL: @multipleExecutables
-func.func @multipleExecutables(
+util.func public @multipleExecutables(
%device: !hal.device,
%layout0: !hal.pipeline_layout,
%layout1: !hal.pipeline_layout
@@ -72,7 +72,7 @@
// CHECK-DAG: %[[FORMAT2:.+]] = vm.rodata.inline "_utf8_format_
// CHECK-DAG: %[[BINARY2:.+]] = vm.const.ref.rodata @exe2_binary2 : !vm.buffer
%1 = hal.executable.create device(%device : !hal.device) target(@exe2::@binary2) layouts([%layout1, %layout0]) : !hal.executable
- return %0, %1 : !hal.executable, !hal.executable
+ util.return %0, %1 : !hal.executable, !hal.executable
}
// -----
@@ -86,7 +86,7 @@
}
// CHECK-LABEL: @executableConstants
-func.func @executableConstants(
+util.func public @executableConstants(
// CHECK-SAME: %[[DEV:.+]]: !vm.ref<!hal.device>
%device: !hal.device,
// CHECK-SAME: %[[LAYOUT:.+]]: !vm.ref<!hal.pipeline_layout>
@@ -117,5 +117,5 @@
constants([%constant0, %c0, %constant1]) : !hal.executable
// CHECK: vm.return %[[EXE]]
- return %0 : !hal.executable
+ util.return %0 : !hal.executable
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/fence_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/fence_ops.mlir
index 6eb570e..995ed77 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/fence_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/fence_ops.mlir
@@ -2,57 +2,57 @@
// CHECK-LABEL: @fence_create
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>)
-func.func @fence_create(%device: !hal.device) -> !hal.fence {
+util.func public @fence_create(%device: !hal.device) -> !hal.fence {
// CHECK: %[[FLAGS:.+]] = vm.const.i32.zero
// CHECK: %[[FENCE:.+]] = vm.call @hal.fence.create(%[[DEVICE]], %[[FLAGS]])
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
// CHECK: vm.return %[[FENCE]]
- return %fence : !hal.fence
+ util.return %fence : !hal.fence
}
// -----
// CHECK-LABEL: @fence_join
// CHECK-SAME: (%[[FENCE0:.+]]: !vm.ref<!hal.fence>, %[[FENCE1:.+]]: !vm.ref<!hal.fence>)
-func.func @fence_join(%fence0: !hal.fence, %fence1: !hal.fence) -> !hal.fence {
+util.func public @fence_join(%fence0: !hal.fence, %fence1: !hal.fence) -> !hal.fence {
// CHECK: %[[JOIN:.+]] = vm.call.variadic @hal.fence.join
// CHECK-SAME: ([%[[FENCE0]], %[[FENCE1]]])
%fence = hal.fence.join at([%fence0, %fence1]) -> !hal.fence
// CHECK: vm.return %[[JOIN]]
- return %fence : !hal.fence
+ util.return %fence : !hal.fence
}
// -----
// CHECK-LABEL: @fence_query
// CHECK-SAME: (%[[FENCE:.+]]: !vm.ref<!hal.fence>)
-func.func @fence_query(%fence: !hal.fence) -> i32 {
+util.func public @fence_query(%fence: !hal.fence) -> i32 {
// CHECK: %[[STATUS:.+]] = vm.call @hal.fence.query(%[[FENCE]])
%status = hal.fence.query<%fence : !hal.fence> : i32
// CHECK: vm.return %[[STATUS]]
- return %status : i32
+ util.return %status : i32
}
// -----
// CHECK-LABEL: @fence_signal
// CHECK-SAME: (%[[FENCE:.+]]: !vm.ref<!hal.fence>)
-func.func @fence_signal(%fence: !hal.fence) {
+util.func public @fence_signal(%fence: !hal.fence) {
// CHECK: vm.call @hal.fence.signal(%[[FENCE]])
hal.fence.signal<%fence : !hal.fence>
// CHECK: vm.return
- return
+ util.return
}
// -----
// CHECK-LABEL: @fence_fail
// CHECK-SAME: (%[[FENCE:.+]]: !vm.ref<!hal.fence>, %[[STATUS:.+]]: i32)
-func.func @fence_fail(%fence: !hal.fence, %status: i32) {
+util.func public @fence_fail(%fence: !hal.fence, %status: i32) {
// CHECK: vm.call @hal.fence.fail(%[[FENCE]], %[[STATUS]])
hal.fence.fail<%fence : !hal.fence> status(%status)
// CHECK: vm.return
- return
+ util.return
}
// -----
@@ -60,10 +60,10 @@
// CHECK-LABEL: @fence_await
// CHECK-SAME: (%[[FENCE0:.+]]: !vm.ref<!hal.fence>, %[[FENCE1:.+]]: !vm.ref<!hal.fence>,
// CHECK-SAME: %[[TIMEOUT:.+]]: i32)
-func.func @fence_await(%fence0: !hal.fence, %fence1: !hal.fence, %timeout: i32) -> i32 {
+util.func public @fence_await(%fence0: !hal.fence, %fence1: !hal.fence, %timeout: i32) -> i32 {
// CHECK: %[[STATUS:.+]] = vm.call.variadic @hal.fence.await
// CHECK-SAME: (%[[TIMEOUT]], [%[[FENCE0]], %[[FENCE1]]])
%status = hal.fence.await until([%fence0, %fence1]) timeout_millis(%timeout) : i32
// CHECK: vm.return %[[STATUS]]
- return %status : i32
+ util.return %status : i32
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/test/shape_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/test/shape_ops.mlir
index 42fc714..d0f59e1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/test/shape_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StandardToHAL/test/shape_ops.mlir
@@ -2,11 +2,11 @@
// CHECK-LABEL: @tensorDim
// CHECK-SAME: (%[[ARG0:.+]]: !hal.buffer_view)
-func.func @tensorDim(%arg0: tensor<4x?xf32>) -> index {
+util.func public @tensorDim(%arg0: tensor<4x?xf32>) -> index {
%c1 = arith.constant 1 : index
// CHECK: %[[DIM:.+]] = hal.buffer_view.dim<%[[ARG0]] : !hal.buffer_view>[1] : index
%dim = tensor.dim %arg0, %c1 : tensor<4x?xf32>
- return %dim : index
+ util.return %dim : index
}
// -----
@@ -16,10 +16,10 @@
// CHECK: @tensorRank
// CHECK-SAME: (%[[ARG0:.+]]: !hal.buffer_view)
-func.func @tensorRank(%arg0: tensor<4x?xf32>) -> index {
+util.func public @tensorRank(%arg0: tensor<4x?xf32>) -> index {
// CHECK-NOT: hal.buffer_view.rank
// CHECK: %[[RANK:.+]] = arith.constant 2
%rank = tensor.rank %arg0 : tensor<4x?xf32>
- // CHECK: return %[[RANK]]
- return %rank : index
+ // CHECK: util.return %[[RANK]]
+ util.return %rank : index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
index 16f9e3e..9a15c91 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
@@ -15,7 +15,6 @@
#include "iree/compiler/Dialect/Stream/IR/StreamTypes.h"
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Dominance.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -818,13 +817,14 @@
newResultTypes))) {
return rewriter.notifyMatchFailure(funcOp, "failed to convert types");
}
- auto newOp = rewriter.replaceOpWithNewOp<func::FuncOp>(
- funcOp, funcOp.getName(),
+ auto newOp = rewriter.replaceOpWithNewOp<IREE::Util::FuncOp>(
+ funcOp, funcOp.getNameAttr(),
rewriter.getFunctionType(newArgTypes, newResultTypes),
- funcOp.getSymVisibilityAttr(),
+ /*tied_operands=*/ArrayAttr{}, funcOp.getSymVisibilityAttr(),
rewriter.getArrayAttr(
ArrayRef<Attribute>(newArgAttrs.data(), newArgAttrs.size())),
- funcOp.getAllResultAttrs());
+ funcOp.getAllResultAttrs(),
+ /*inlining_policy=*/IREE::Util::InliningPolicyAttrInterface{});
newOp->setDialectAttrs(funcOp->getDialectAttrs());
return success();
}
@@ -867,8 +867,9 @@
llvm::append_range(resultTypes, convertedTypes);
}
- rewriter.replaceOpWithNewOp<func::CallOp>(callOp, callOp.getCalleeAttr(),
- resultTypes, operands);
+ rewriter.replaceOpWithNewOp<IREE::Util::CallOp>(
+ callOp, resultTypes, callOp.getCallee(), operands,
+ /*tied_operands=*/ArrayAttr{});
return success();
}
};
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/channel_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/channel_ops.mlir
index d11abe8..3f88bd1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/channel_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/channel_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @channel_create
// CHECK-SAME: () -> !hal.channel
-func.func @channel_create() -> !stream.channel {
+util.func public @channel_create() -> !stream.channel {
// CHECK-DAG: %[[DEVICE:.+]] = hal.devices.get %{{.+}} : !hal.device
// CHECK-DAG: %[[AFFINITY:.+]] = arith.constant 3
// CHECK-DAG: %[[ID:.+]] = util.null : !util.buffer
@@ -10,15 +10,15 @@
// CHECK-DAG: %[[DEFAULT:.+]] = arith.constant -1
// CHECK: %[[CHANNEL:.+]] = hal.channel.create device(%[[DEVICE]] : !hal.device) affinity(%[[AFFINITY]]) flags(0) id(%[[ID]]) group(%[[GROUP]]) rank(%[[DEFAULT]]) count(%[[DEFAULT]]) : !hal.channel
%channel = stream.channel.create on(#hal.affinity.queue<[0, 1]>) group("group") : !stream.channel
- // CHECK: return %[[CHANNEL]]
- return %channel : !stream.channel
+ // CHECK: util.return %[[CHANNEL]]
+ util.return %channel : !stream.channel
}
// -----
// CHECK-LABEL: @channel_split
// CHECK-SAME: (%[[BASE_CHANNEL:.+]]: !hal.channel)
-func.func @channel_split(%base_channel: !stream.channel) {
+util.func public @channel_split(%base_channel: !stream.channel) {
// CHECK-DAG: %[[COLOR_INDEX:.+]] = arith.constant 100
%color = arith.constant 100 : index
// CHECK-DAG: %[[KEY_INDEX:.+]] = arith.constant 101
@@ -27,29 +27,29 @@
// CHECK-DAG: %[[KEY_I32:.+]] = arith.index_cast %[[KEY_INDEX]] : index to i32
// CHECK: %channel = hal.channel.split<%[[BASE_CHANNEL]] : !hal.channel> color(%[[COLOR_I32]]) key(%[[KEY_I32]]) flags(0) : !hal.channel
%split_channel = stream.channel.split %base_channel, %color, %key : !stream.channel -> !stream.channel
- return
+ util.return
}
// -----
// CHECK-LABEL: @channel_rank
// CHECK-SAME: (%[[CHANNEL:.+]]: !hal.channel)
-func.func @channel_rank(%channel: !stream.channel) -> index {
+util.func public @channel_rank(%channel: !stream.channel) -> index {
// CHECK: %[[RANK_I32:.+]], %[[COUNT_I32:.+]] = hal.channel.rank_and_count<%[[CHANNEL]] : !hal.channel> : i32, i32
// CHECK: %[[RANK:.+]] = arith.index_cast %[[RANK_I32]] : i32 to index
%rank = stream.channel.rank %channel : index
- // CHECK: return %[[RANK]]
- return %rank : index
+ // CHECK: util.return %[[RANK]]
+ util.return %rank : index
}
// -----
// CHECK-LABEL: @channel_count
// CHECK-SAME: (%[[CHANNEL:.+]]: !hal.channel) -> index
-func.func @channel_count(%channel: !stream.channel) -> index {
+util.func public @channel_count(%channel: !stream.channel) -> index {
// CHECK: %[[RANK_I32:.+]], %[[COUNT_I32:.+]] = hal.channel.rank_and_count<%[[CHANNEL]] : !hal.channel> : i32, i32
// CHECK: %[[COUNT:.+]] = arith.index_cast %[[COUNT_I32]] : i32 to index
%count = stream.channel.count %channel : index
- // CHECK: return %[[COUNT]]
- return %count : index
+ // CHECK: util.return %[[COUNT]]
+ util.return %count : index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
index d9e6873..e0db957 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
@@ -4,7 +4,7 @@
// the normal sequential execution barriers.
// CHECK-LABEL: @cmdMemoryControl
-func.func @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func public @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
@@ -17,13 +17,13 @@
stream.cmd.discard %arg2[%c0 for %c128] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
// CHECK-NEXT: hal.command_buffer.finalize<%[[CMD]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdFill
-func.func @cmdFill(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func public @cmdFill(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c255_i32 = arith.constant 255 : i32
@@ -36,13 +36,13 @@
// CHECK-NEXT: hal.command_buffer.execution_barrier<%[[CMD]]
} => !stream.timepoint
// CHECK-NEXT: hal.command_buffer.finalize<%[[CMD]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdCopy
-func.func @cmdCopy(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index) -> !stream.timepoint {
+util.func public @cmdCopy(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
@@ -55,13 +55,13 @@
// CHECK-NEXT: hal.command_buffer.execution_barrier<%[[CMD]]
} => !stream.timepoint
// CHECK-NEXT: hal.command_buffer.finalize<%[[CMD]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdCollective
-func.func @cmdCollective(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<transient>, %arg3: index, %arg4: !stream.channel) -> !stream.timepoint {
+util.func public @cmdCollective(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<transient>, %arg3: index, %arg4: !stream.channel) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
@@ -117,7 +117,7 @@
} => !stream.timepoint
// CHECK-NEXT: hal.command_buffer.finalize<%[[CMD]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
@@ -128,7 +128,7 @@
// to.
// CHECK-LABEL: @cmdExecute
-func.func @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
+util.func public @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
@@ -158,8 +158,8 @@
// CHECK-SAME: wait(%arg4)
// CHECK-SAME: signal(%[[SIGNAL_FENCE]])
// CHECK-SAME: commands([%[[CMD]]])
- // CHECK: return %[[SIGNAL_FENCE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[SIGNAL_FENCE]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -211,7 +211,7 @@
}
// CHECK-LABEL: @cmdDispatch
-func.func @cmdDispatch(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<external>, %arg3: index) -> !stream.timepoint {
+util.func public @cmdDispatch(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<external>, %arg3: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -277,7 +277,7 @@
// CHECK: hal.command_buffer.execution_barrier<%[[CMD]]
} => !stream.timepoint
// CHECK-NEXT: hal.command_buffer.finalize<%[[CMD]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
@@ -285,11 +285,11 @@
// Tests conversion of streamable calls and function declarations.
// Expect a command buffer and a buffer + offset + length for each resource.
-// CHECK: func.func private @cmdFunc(!hal.command_buffer, !hal.buffer, index, index, i32, !hal.buffer, index, index, !custom.type, !hal.buffer, index, index)
+// CHECK: util.func private @cmdFunc(%arg0: !hal.command_buffer, %arg1: !hal.buffer, %arg2: index, %arg3: index, %arg4: i32, %arg5: !hal.buffer, %arg6: index, %arg7: index, %arg8: !custom.type, %arg9: !hal.buffer, %arg10: index, %arg11: index)
stream.cmd.func private @cmdFunc(%arg0[%arg1 for %arg2]: !stream.resource<*>, %arg3: i32, %arg4[%arg5 for %arg6]: !stream.resource<*>, %arg7: !custom.type, %arg8[%arg9 for %arg10]: !stream.resource<*>)
// CHECK-LABEL: @cmdCall
-func.func @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<external>, %arg3: !custom.type, %arg4: !stream.resource<external>) -> !stream.timepoint {
+util.func public @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<external>, %arg3: !custom.type, %arg4: !stream.resource<external>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[SIZE0:.+]] = arith.constant 100
%size0 = arith.constant 100 : index
@@ -299,11 +299,11 @@
%size2 = arith.constant 102 : index
// CHECK: %[[COMMAND_BUFFER:.+]] = hal.command_buffer.create
%timepoint = stream.cmd.execute with(%arg0 as %stream0: !stream.resource<external>{%size0}, %arg2 as %stream1: !stream.resource<external>{%size1}, %arg4 as %stream2: !stream.resource<external>{%size2}) {
- // CHECK: call @cmdFunc(%[[COMMAND_BUFFER]], %arg0, %c0, %[[SIZE0]], %arg1, %arg2, %c0, %[[SIZE1]], %arg3, %arg4, %c0, %[[SIZE2]]) :
+ // CHECK: util.call @cmdFunc(%[[COMMAND_BUFFER]], %arg0, %c0, %[[SIZE0]], %arg1, %arg2, %c0, %[[SIZE1]], %arg3, %arg4, %c0, %[[SIZE2]]) :
// CHECK-SAME: (!hal.command_buffer, !hal.buffer, index, index, i32, !hal.buffer, index, index, !custom.type, !hal.buffer, index, index) -> ()
stream.cmd.call @cmdFunc(ro %stream0[%c0 for %size0], %arg1, rw %stream1[%c0 for %size1], %arg3, wo %stream2[%c0 for %size2]) : (!stream.resource<external>{%size0}, i32, !stream.resource<external>{%size1}, !custom.type, !stream.resource<external>{%size2}) -> ()
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
@@ -313,7 +313,7 @@
// the target affinities (0b01 | 0b10 = 0b11 = 3).
// CHECK-LABEL: @cmdExecuteAffinities
-func.func @cmdExecuteAffinities(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
+util.func public @cmdExecuteAffinities(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
@@ -323,5 +323,5 @@
// CHECK: hal.device.queue.execute
// CHECK-SAME: affinity(%c3_i64)
// CHECK-SAME: commands([%[[CMD]]])
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/context_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/context_ops.mlir
index dd43362..5d73951 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/context_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/context_ops.mlir
@@ -1,42 +1,42 @@
// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-hal-conversion %s | FileCheck %s
// CHECK-LABEL: @contextResolveAllocator
-func.func @contextResolveAllocator() -> !hal.allocator {
+util.func public @contextResolveAllocator() -> !hal.allocator {
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
// CHECK: %[[ALLOCATOR:.+]] = hal.device.allocator<%[[DEVICE]] : !hal.device> : !hal.allocator
%allocator = stream.context.resolve : !hal.allocator
- // CHECK: return %[[ALLOCATOR]]
- return %allocator : !hal.allocator
+ // CHECK: util.return %[[ALLOCATOR]]
+ util.return %allocator : !hal.allocator
}
// -----
// CHECK-LABEL: @contextResolveDevice
-func.func @contextResolveDevice() -> !hal.device {
+util.func public @contextResolveDevice() -> !hal.device {
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
%device = stream.context.resolve : !hal.device
- // CHECK: return %[[DEVICE]]
- return %device : !hal.device
+ // CHECK: util.return %[[DEVICE]]
+ util.return %device : !hal.device
}
// -----
// CHECK-LABEL: @contextResolveDeviceQueueAffinityAny
-func.func @contextResolveDeviceQueueAffinityAny() -> (!hal.device, i64) {
+util.func public @contextResolveDeviceQueueAffinityAny() -> (!hal.device, i64) {
// CHECK-DAG: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
// CHECK-DAG: %[[QUEUE_AFFINITY:.+]] = arith.constant -1 : i64
%device, %queue_affinity_any = stream.context.resolve on(#hal.affinity.queue<*>) : !hal.device, i64
- // CHECK: return %[[DEVICE]], %[[QUEUE_AFFINITY]]
- return %device, %queue_affinity_any : !hal.device, i64
+ // CHECK: util.return %[[DEVICE]], %[[QUEUE_AFFINITY]]
+ util.return %device, %queue_affinity_any : !hal.device, i64
}
// -----
// CHECK-LABEL: @contextResolveDeviceQueueAffinity45
-func.func @contextResolveDeviceQueueAffinity45() -> (!hal.device, i64) {
+util.func public @contextResolveDeviceQueueAffinity45() -> (!hal.device, i64) {
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
// CHECK-DAG: %[[QUEUE_AFFINITY:.+]] = arith.constant 48 : i64
%device, %queue_affinity_45 = stream.context.resolve on(#hal.affinity.queue<[4, 5]>) : !hal.device, i64
- // CHECK: return %[[DEVICE]], %[[QUEUE_AFFINITY]]
- return %device, %queue_affinity_45 : !hal.device, i64
+ // CHECK: util.return %[[DEVICE]], %[[QUEUE_AFFINITY]]
+ util.return %device, %queue_affinity_45 : !hal.device, i64
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/debug_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/debug_ops.mlir
index a748326..3e3e984 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/debug_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/debug_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @tensorTrace
// CHECK-SAME: (%[[TENSOR0_BUFFER:.+]]: !hal.buffer, %[[TENSOR0_SIZE:.+]]: index, %[[TENSOR1_BUFFER:.+]]: !hal.buffer, %[[TENSOR1_SIZE:.+]]: index, %[[TENSOR1_DIM0:.+]]: index)
-func.func @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index) {
+util.func public @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index) {
// CHECK-DAG: %[[TENSOR0:.+]] = hal.buffer_view.create buffer(%[[TENSOR0_BUFFER]] : !hal.buffer)[%c0{{.*}}, %[[TENSOR0_SIZE]]] shape([%c5, %c3])
// CHECK-DAG: %[[TENSOR1:.+]] = hal.buffer_view.create buffer(%[[TENSOR1_BUFFER]] : !hal.buffer)[%c0{{.*}}, %[[TENSOR1_SIZE]]] shape([%[[TENSOR1_DIM0]], %c5{{.*}}])
// CHECK: hal.buffer_view.trace "FOOBAR" = %[[TENSOR0]], %[[TENSOR1]] : !hal.buffer_view, !hal.buffer_view
@@ -10,5 +10,5 @@
%tensor0 : tensor<5x3xf32> in !stream.resource<staging>{%tensor0_size},
%tensor1 : tensor<?x5xf32>{%tensor1_dim0} in !stream.resource<staging>{%tensor1_size}
]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/file_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/file_ops.mlir
index 9473df7..1182ee4 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/file_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/file_ops.mlir
@@ -2,20 +2,20 @@
// CHECK-LABEL: @file_constant
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer)
-func.func @file_constant(%buffer: !util.buffer) {
+util.func public @file_constant(%buffer: !util.buffer) {
%c0 = arith.constant 0 : index
%c1088 = arith.constant 1088 : index
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
// CHECK: = hal.ex.file.from_memory device(%[[DEVICE]] : !hal.device) affinity(%c-1_i64) access(Read) buffer(%[[BUFFER]] : !util.buffer)[%c0 for %c1088] flags(%c0_i32) : !hal.file
%file = stream.file.constant %buffer[%c0 for %c1088] : !util.buffer{%c1088} -> !stream.file
- return
+ util.return
}
// -----
// CHECK-LABEL: @file_read
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[FILE:.+]]: !hal.file, %[[RESOURCE:.+]]: !hal.buffer)
-func.func @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
+util.func public @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%c1088 = arith.constant 1088 : index
@@ -23,15 +23,15 @@
// CHECK: %[[SIGNAL:.+]] = hal.fence.create
// CHECK: hal.device.queue.read<%[[DEVICE]] : !hal.device> affinity(%c-1_i64) wait(%[[WAIT]]) signal(%[[SIGNAL]]) source(%[[FILE]] : !hal.file)[%c0_i64] target(%[[RESOURCE]] : !hal.buffer)[%c0] length(%c1088) flags(0)
%signal = stream.file.read await(%wait) => %file[%c0_i64], %resource[%c0], %c1088 : !stream.file -> !stream.resource<variable>{%c1088} => !stream.timepoint
- // CHECK: return %[[SIGNAL]]
- return %signal : !stream.timepoint
+ // CHECK: util.return %[[SIGNAL]]
+ util.return %signal : !stream.timepoint
}
// -----
// CHECK-LABEL: @file_write
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[FILE:.+]]: !hal.file, %[[RESOURCE:.+]]: !hal.buffer)
-func.func @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
+util.func public @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%c1088 = arith.constant 1088 : index
@@ -39,6 +39,6 @@
// CHECK: %[[SIGNAL:.+]] = hal.fence.create
// CHECK: hal.device.queue.write<%[[DEVICE]] : !hal.device> affinity(%c-1_i64) wait(%[[WAIT]]) signal(%[[SIGNAL]]) source(%[[RESOURCE]] : !hal.buffer)[%c0] target(%[[FILE]] : !hal.file)[%c0_i64] length(%c1088) flags(0)
%signal = stream.file.write await(%wait) => %resource[%c0], %file[%c0_i64], %c1088 : !stream.resource<variable>{%c1088} -> !stream.file => !stream.timepoint
- // CHECK: return %[[SIGNAL]]
- return %signal : !stream.timepoint
+ // CHECK: util.return %[[SIGNAL]]
+ util.return %signal : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/resource_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/resource_ops.mlir
index 88cb014..6af93ee 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/resource_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/resource_ops.mlir
@@ -1,21 +1,21 @@
// RUN: iree-opt --split-input-file --iree-hal-conversion %s | FileCheck %s
// CHECK-LABEL: @resourceAlloc
-func.func @resourceAlloc(%arg0: index) -> !stream.resource<transient> {
+util.func public @resourceAlloc(%arg0: index) -> !stream.resource<transient> {
// CHECK: %[[RET0:.+]] = hal.allocator.allocate
// CHECK-SAME: type("DeviceVisible|DeviceLocal")
// CHECK-SAME: usage("{{.+}}Transfer{{.+}}Dispatch{{.+}}")
// CHECK-SAME: : !hal.buffer{%arg0}
%0 = stream.resource.alloc uninitialized : !stream.resource<transient>{%arg0}
- // CHECK: return %[[RET0]]
- return %0 : !stream.resource<transient>
+ // CHECK: util.return %[[RET0]]
+ util.return %0 : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @resourceAlloca
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @resourceAlloca(%size: index) -> (!stream.resource<transient>, !stream.timepoint) {
+util.func public @resourceAlloca(%size: index) -> (!stream.resource<transient>, !stream.timepoint) {
// CHECK: %[[WAIT_FENCE:.+]] = util.null : !hal.fence
// CHECK: %[[SIGNAL_FENCE:.+]] = hal.fence.create
// CHECK: %[[RET0:.+]] = hal.device.queue.alloca
@@ -27,15 +27,15 @@
// CHECK-SAME: usage("{{.+}}Transfer{{.+}}Dispatch{{.+}}")
// CHECK-SAME: : !hal.buffer{%[[SIZE]]}
%0:2 = stream.resource.alloca uninitialized : !stream.resource<transient>{%size} => !stream.timepoint
- // CHECK: return %[[RET0]], %[[SIGNAL_FENCE]]
- return %0#0, %0#1 : !stream.resource<transient>, !stream.timepoint
+ // CHECK: util.return %[[RET0]], %[[SIGNAL_FENCE]]
+ util.return %0#0, %0#1 : !stream.resource<transient>, !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceAllocaAwait
// CHECK-SAME: (%[[SIZE:.+]]: index, %[[WAIT_FENCE:.+]]: !hal.fence)
-func.func @resourceAllocaAwait(%size: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<transient>, !stream.timepoint) {
+util.func public @resourceAllocaAwait(%size: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<transient>, !stream.timepoint) {
// CHECK: %[[SIGNAL_FENCE:.+]] = hal.fence.create
// CHECK: %[[RET0:.+]] = hal.device.queue.alloca
// CHECK-SAME: affinity(%c-1
@@ -46,15 +46,15 @@
// CHECK-SAME: usage("{{.+}}Transfer{{.+}}Dispatch{{.+}}")
// CHECK-SAME: : !hal.buffer{%[[SIZE]]}
%0:2 = stream.resource.alloca uninitialized await(%await_timepoint) => !stream.resource<transient>{%size} => !stream.timepoint
- // CHECK: return %[[RET0]], %[[SIGNAL_FENCE]]
- return %0#0, %0#1 : !stream.resource<transient>, !stream.timepoint
+ // CHECK: util.return %[[RET0]], %[[SIGNAL_FENCE]]
+ util.return %0#0, %0#1 : !stream.resource<transient>, !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceDealloca
// CHECK-SAME: (%[[SIZE:.+]]: index, %[[RESOURCE:.+]]: !hal.buffer)
-func.func @resourceDealloca(%size: index, %resource: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @resourceDealloca(%size: index, %resource: !stream.resource<transient>) -> !stream.timepoint {
// CHECK: %[[WAIT_FENCE:.+]] = util.null : !hal.fence
// CHECK: %[[SIGNAL_FENCE:.+]] = hal.fence.create
// CHECK: hal.device.queue.dealloca
@@ -63,8 +63,8 @@
// CHECK-SAME: signal(%[[SIGNAL_FENCE]])
// CHECK-SAME: buffer(%[[RESOURCE]] : !hal.buffer)
%0 = stream.resource.dealloca %resource : !stream.resource<transient>{%size} => !stream.timepoint
- // CHECK: return %[[SIGNAL_FENCE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[SIGNAL_FENCE]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -73,7 +73,7 @@
// CHECK-LABEL: @resourceDeallocaAwait
// CHECK-SAME: (%[[SIZE:.+]]: index, %[[RESOURCE:.+]]: !hal.buffer, %[[WAIT_FENCE:.+]]: !hal.fence)
-func.func @resourceDeallocaAwait(%size: index, %resource: !stream.resource<transient>, %await_timepoint: !stream.timepoint) -> !stream.timepoint {
+util.func public @resourceDeallocaAwait(%size: index, %resource: !stream.resource<transient>, %await_timepoint: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[SIGNAL_FENCE:.+]] = hal.fence.create
// CHECK: hal.device.queue.dealloca
// CHECK-SAME: affinity(%c-1
@@ -81,24 +81,24 @@
// CHECK-SAME: signal(%[[SIGNAL_FENCE]])
// CHECK-SAME: buffer(%[[RESOURCE]] : !hal.buffer)
%0 = stream.resource.dealloca await(%await_timepoint) => %resource : !stream.resource<transient>{%size} => !stream.timepoint
- // CHECK: return %[[SIGNAL_FENCE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[SIGNAL_FENCE]]
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceSize
-func.func @resourceSize(%arg0: !stream.resource<transient>) -> index {
+util.func public @resourceSize(%arg0: !stream.resource<transient>) -> index {
// CHECK: %[[SIZE:.+]] = hal.buffer.length<%arg0 : !hal.buffer> : index
%0 = stream.resource.size %arg0 : !stream.resource<transient>
- // CHECK: return %[[SIZE]]
- return %0 : index
+ // CHECK: util.return %[[SIZE]]
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @resourceTryMap
-func.func @resourceTryMap(%arg0: !util.buffer) -> (i1, !stream.resource<constant>) {
+util.func public @resourceTryMap(%arg0: !util.buffer) -> (i1, !stream.resource<constant>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[DID_IMPORT:.+]], %[[IMPORTED:.+]] = hal.allocator.import
@@ -106,40 +106,40 @@
// CHECK-SAME: type("DeviceVisible|DeviceLocal")
// CHECK-SAME: usage("{{.+}}Transfer{{.+}}Dispatch{{.+}}SharingImmutable") : i1, !hal.
%did_map, %mapping = stream.resource.try_map %arg0[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c128}
- // CHECK: return %[[DID_IMPORT]], %[[IMPORTED]]
- return %did_map, %mapping : i1, !stream.resource<constant>
+ // CHECK: util.return %[[DID_IMPORT]], %[[IMPORTED]]
+ util.return %did_map, %mapping : i1, !stream.resource<constant>
}
// -----
// CHECK-LABEL: @resourceLoad
-func.func @resourceLoad(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
+util.func public @resourceLoad(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
%c4 = arith.constant 4 : index
// CHECK: %[[RET0:.+]] = hal.buffer.load<%arg0 : !hal.buffer>[%c4] : i32
%0 = stream.resource.load %arg0[%c4] : !stream.resource<staging>{%arg1} -> i32
- // CHECK: return %[[RET0]]
- return %0 : i32
+ // CHECK: util.return %[[RET0]]
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @resourceStore
-func.func @resourceStore(%arg0: !stream.resource<staging>, %arg1: index) {
+util.func public @resourceStore(%arg0: !stream.resource<staging>, %arg1: index) {
%c4 = arith.constant 4 : index
%c123_i32 = arith.constant 123 : i32
// CHECK: hal.buffer.store<%arg0 : !hal.buffer>[%c4] value(%c123_i32 : i32)
stream.resource.store %c123_i32, %arg0[%c4] : i32 -> !stream.resource<staging>{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @resourceSubview
-func.func @resourceSubview(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
+util.func public @resourceSubview(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
// CHECK: %[[RET0:.+]] = hal.buffer.subspan<%arg0 : !hal.buffer>[%c128, %c256] : !hal.buffer
%0 = stream.resource.subview %arg0[%c128] : !stream.resource<transient>{%arg1} -> !stream.resource<transient>{%c256}
- // CHECK: return %[[RET0]]
- return %0 : !stream.resource<transient>
+ // CHECK: util.return %[[RET0]]
+ util.return %0 : !stream.resource<transient>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/timepoint_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/timepoint_ops.mlir
index cca49b1..8a7b691 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/timepoint_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/timepoint_ops.mlir
@@ -2,86 +2,86 @@
// CHECK-LABEL: util.global private mutable @rwTimepoint : !hal.fence
util.global private mutable @rwTimepoint = #stream.timepoint<immediate>
-// CHECK: func.func @globalTimepoint(%arg0: !hal.fence) -> !hal.fence
-func.func @globalTimepoint(%arg0: !stream.timepoint) -> !stream.timepoint {
+// CHECK: util.func public @globalTimepoint(%arg0: !hal.fence) -> !hal.fence
+util.func public @globalTimepoint(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK: util.global.store %arg0, @rwTimepoint
util.global.store %arg0, @rwTimepoint : !stream.timepoint
// CHECK: %[[VALUE:.+]] = util.global.load @rwTimepoint
%value = util.global.load @rwTimepoint : !stream.timepoint
- // CHECK: return %[[VALUE]]
- return %value : !stream.timepoint
+ // CHECK: util.return %[[VALUE]]
+ util.return %value : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointImmediate
-func.func @timepointImmediate() -> !stream.timepoint {
+util.func public @timepointImmediate() -> !stream.timepoint {
// CHECK: %[[FENCE:.+]] = util.null : !hal.fence
%0 = stream.timepoint.immediate => !stream.timepoint
- // CHECK: return %[[FENCE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[FENCE]]
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointImportFence
-func.func @timepointImportFence(%arg0: !hal.fence) -> !stream.timepoint {
+util.func public @timepointImportFence(%arg0: !hal.fence) -> !stream.timepoint {
%0 = stream.timepoint.import %arg0 : (!hal.fence) => !stream.timepoint
- // CHECK: return %arg0
- return %0 : !stream.timepoint
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointExportFence
-func.func @timepointExportFence(%arg0: !stream.timepoint) -> !hal.fence {
+util.func public @timepointExportFence(%arg0: !stream.timepoint) -> !hal.fence {
%0 = stream.timepoint.export %arg0 => (!hal.fence)
- // CHECK: return %arg0
- return %0 : !hal.fence
+ // CHECK: util.return %arg0
+ util.return %0 : !hal.fence
}
// -----
// CHECK-LABEL: @timepointChainExternal
// CHECK-SAME: (%[[TIMEPOINT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence)
-func.func @timepointChainExternal(%timepoint: !stream.timepoint, %signal: !hal.fence) {
+util.func public @timepointChainExternal(%timepoint: !stream.timepoint, %signal: !hal.fence) {
// CHECK: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
// CHECK: hal.device.queue.execute<%[[DEVICE]] : !hal.device> affinity(%c-1_i64) wait(%[[TIMEPOINT]]) signal(%[[SIGNAL]])
stream.timepoint.chain_external %timepoint => (%signal : !hal.fence)
- return
+ util.return
}
// -----
// CHECK-LABEL: @timepointJoin
-func.func @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
+util.func public @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[FENCE:.+]] = hal.fence.join at([%arg0, %arg1]) -> !hal.fence
%0 = stream.timepoint.join max(%arg0, %arg1) => !stream.timepoint
- // CHECK: return %[[FENCE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[FENCE]]
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointBarrier
// CHECK-SAME: (%[[R0:.+]]: !hal.buffer) -> (!hal.buffer, !hal.fence)
-func.func @timepointBarrier(%r0: !stream.resource<external>) -> (!stream.resource<external>, !stream.timepoint) {
+util.func public @timepointBarrier(%r0: !stream.resource<external>) -> (!stream.resource<external>, !stream.timepoint) {
%c128 = arith.constant 128 : index
// CHECK: %[[R1T:.+]] = util.null : !hal.fence
%r1, %r1t = stream.timepoint.barrier %r0 : !stream.resource<external>{%c128} => !stream.timepoint
- // CHECK: return %[[R0]], %[[R1T]]
- return %r1, %r1t : !stream.resource<external>, !stream.timepoint
+ // CHECK: util.return %[[R0]], %[[R1T]]
+ util.return %r1, %r1t : !stream.resource<external>, !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointAwait
-func.func @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
+util.func public @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: %[[WAIT_OK:.+]] = hal.fence.await until([%arg0]) timeout_millis(%c-1_i32) : i32
// CHECK-NEXT: util.status.check_ok %[[WAIT_OK]]
%0:2 = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
- // CHECK: return %arg1, %arg2
- return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
+ // CHECK: util.return %arg1, %arg2
+ util.return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/transfer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/transfer_ops.mlir
index f594e7c..1dbcc24 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/transfer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/transfer_ops.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --iree-hal-conversion %s | FileCheck %s
// CHECK-LABEL: @tensorImportBuffer
-func.func @tensorImportBuffer(%arg0: !hal.buffer, %arg1: index) -> !stream.resource<external> {
+util.func public @tensorImportBuffer(%arg0: !hal.buffer, %arg1: index) -> !stream.resource<external> {
%c20 = arith.constant 20 : index
// CHECK-DAG: %[[ALLOCATOR:.+]] = hal.device.allocator
// CHECK: hal.buffer.assert<%arg0 : !hal.buffer>
@@ -11,8 +11,8 @@
// CHECK-SAME: type(DeviceVisible)
// CHECK-SAME: usage("Transfer{{.+}}Dispatch{{.+}}")
%0 = stream.tensor.import %arg0 : !hal.buffer -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
- // CHECK: return %arg0
- return %0 : !stream.resource<external>
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.resource<external>
}
// -----
@@ -22,7 +22,7 @@
// buffer itself.
// CHECK-LABEL: @tensorImportBufferView
-func.func @tensorImportBufferView(%arg0: !hal.buffer_view, %arg1: index) -> !stream.resource<external> {
+util.func public @tensorImportBufferView(%arg0: !hal.buffer_view, %arg1: index) -> !stream.resource<external> {
%c20 = arith.constant 20 : index
// CHECK-DAG: %[[BUFFER:.+]] = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
// CHECK-DAG: %[[ALLOCATOR:.+]] = hal.device.allocator
@@ -33,24 +33,24 @@
// CHECK-SAME: type(DeviceVisible)
// CHECK-SAME: usage("Transfer{{.+}}Dispatch{{.+}}")
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
- // CHECK: return %[[BUFFER]]
- return %0 : !stream.resource<external>
+ // CHECK: util.return %[[BUFFER]]
+ util.return %0 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @tensorExportBuffer
-func.func @tensorExportBuffer(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer {
+util.func public @tensorExportBuffer(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer {
%c200 = arith.constant 200 : index
%0 = stream.tensor.export %arg0 : tensor<?x1x10xf32>{%arg1} in !stream.resource<external>{%c200} -> !hal.buffer
- // CHECK: return %arg0 : !hal.buffer
- return %0 : !hal.buffer
+ // CHECK: util.return %arg0 : !hal.buffer
+ util.return %0 : !hal.buffer
}
// -----
// CHECK-LABEL: @tensorExportBufferView
-func.func @tensorExportBufferView(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer_view {
+util.func public @tensorExportBufferView(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer_view {
%c200 = arith.constant 200 : index
// CHECK-DAG: %[[ELEMENT_TYPE:.+]] = hal.element_type<f32> : i32
// CHECK-DAG: %[[ENCODING_TYPE:.+]] = hal.encoding_type<dense_row_major> : i32
@@ -61,6 +61,6 @@
// CHECK-SAME: encoding(%[[ENCODING_TYPE]])
// CHECK-SAME: : !hal.buffer_view
%0 = stream.tensor.export %arg0 : tensor<?x1x10xf32>{%arg1} in !stream.resource<external>{%c200} -> !hal.buffer_view
- // CHECK: return %[[VIEW]]
- return %0 : !hal.buffer_view
+ // CHECK: util.return %[[VIEW]]
+ util.return %0 : !hal.buffer_view
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/test/global_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/test/global_ops.mlir
index b25d3c2..0686f0e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/test/global_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/UtilToHAL/test/global_ops.mlir
@@ -9,11 +9,11 @@
// CHECK-LABEL: @resourceGlobals
// CHECK-SAME: (%[[ARG0:.+]]: !hal.buffer) -> !hal.buffer
-func.func private @resourceGlobals(%arg0: !stream.resource<variable>) -> !stream.resource<variable> {
+util.func private @resourceGlobals(%arg0: !stream.resource<variable>) -> !stream.resource<variable> {
// CHECK: util.global.store %[[ARG0]], @resource : !hal.buffer
util.global.store %arg0, @resource : !stream.resource<variable>
// CHECK: %[[VALUE:.+]] = util.global.load @resource : !hal.buffer
%value = util.global.load @resource : !stream.resource<variable>
- // CHECK: return %[[VALUE]]
- return %value : !stream.resource<variable>
+ // CHECK: util.return %[[VALUE]]
+ util.return %value : !stream.resource<variable>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/allocator_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/allocator_ops.mlir
index cd35bf6..628a903 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/allocator_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/allocator_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @allocator_allocate
// CHECK-SAME: (%[[ALLOCATOR:.+]]: !hal.allocator)
-func.func @allocator_allocate(%allocator: !hal.allocator) {
+util.func public @allocator_allocate(%allocator: !hal.allocator) {
// CHECK-DAG: %[[AFFINITY:.+]] = arith.constant -1
%affinity = arith.constant -1 : i64
// CHECK-DAG: %[[SIZE:.+]] = arith.constant 123
@@ -13,14 +13,14 @@
// CHECK-SAME: : !hal.buffer{%[[SIZE]]}
%ref = hal.allocator.allocate<%allocator : !hal.allocator>
affinity(%affinity) type(HostLocal) usage(Transfer) : !hal.buffer{%size}
- return
+ util.return
}
// -----
// CHECK-LABEL: @allocator_import
// CHECK-SAME: %[[ALLOCATOR:.+]]: !hal.allocator
-func.func @allocator_import(%allocator: !hal.allocator, %arg1: !util.buffer) {
+util.func public @allocator_import(%allocator: !hal.allocator, %arg1: !util.buffer) {
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 100
%offset = arith.constant 100 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 200
@@ -36,5 +36,5 @@
%ok, %ref = hal.allocator.import<%allocator : !hal.allocator>
source(%arg1 : !util.buffer)[%offset, %length]
affinity(%affinity) type(DeviceLocal) usage(Transfer) : i1, !hal.buffer
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_ops.mlir
index 88b51b5..278ff8e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_ops.mlir
@@ -1,44 +1,44 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @buffer_subspan
-func.func @buffer_subspan(%arg0: !hal.buffer) -> !hal.buffer {
+util.func public @buffer_subspan(%arg0: !hal.buffer) -> !hal.buffer {
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 100
%offset = arith.constant 100 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 200
%length = arith.constant 200 : index
// CHECK: %buffer = hal.buffer.subspan<%arg0 : !hal.buffer>[%[[OFFSET]], %[[LENGTH]]] : !hal.buffer
%buffer = hal.buffer.subspan<%arg0 : !hal.buffer>[%offset, %length] : !hal.buffer
- return %buffer : !hal.buffer
+ util.return %buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @buffer_length
-func.func @buffer_length(%arg0: !hal.buffer) -> index {
+util.func public @buffer_length(%arg0: !hal.buffer) -> index {
// CHECK: hal.buffer.length<%arg0 : !hal.buffer> : index
%length = hal.buffer.length<%arg0 : !hal.buffer> : index
- return %length : index
+ util.return %length : index
}
// -----
// CHECK-LABEL: @buffer_load
-func.func @buffer_load(%arg0: !hal.buffer) -> i32 {
+util.func public @buffer_load(%arg0: !hal.buffer) -> i32 {
// CHECK-DAG: %[[SRC_OFFSET:.+]] = arith.constant 100
%src_offset = arith.constant 100 : index
// CHECK: %[[VAL:.+]] = hal.buffer.load<%arg0 : !hal.buffer>[%[[SRC_OFFSET]]] : i32
%1 = hal.buffer.load<%arg0 : !hal.buffer>[%src_offset] : i32
- // CHECK-NEXT: return %[[VAL]]
- return %1 : i32
+ // CHECK-NEXT: util.return %[[VAL]]
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @buffer_store
-func.func @buffer_store(%arg0: !hal.buffer, %arg1: i32) {
+util.func public @buffer_store(%arg0: !hal.buffer, %arg1: i32) {
// CHECK-DAG: %[[DST_OFFSET:.+]] = arith.constant 100
%dst_offset = arith.constant 100 : index
// CHECK: hal.buffer.store<%arg0 : !hal.buffer>[%[[DST_OFFSET]]] value(%arg1 : i32)
hal.buffer.store<%arg0 : !hal.buffer>[%dst_offset] value(%arg1 : i32)
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_folding.mlir
index b2ec3ce..8d94bed 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_folding.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @FoldBufferViewCreateSubspan
// CHECK-SAME: (%[[BASE_BUFFER:.+]]: !hal.buffer, %[[SUBSPAN_OFFSET:.+]]: index, %[[SUBSPAN_LENGTH:.+]]: index)
-func.func @FoldBufferViewCreateSubspan(%base_buffer: !hal.buffer, %subspan_offset: index, %subspan_length: index) -> !hal.buffer_view {
+util.func public @FoldBufferViewCreateSubspan(%base_buffer: !hal.buffer, %subspan_offset: index, %subspan_length: index) -> !hal.buffer_view {
%subspan = hal.buffer.subspan<%base_buffer : !hal.buffer>[%subspan_offset, %subspan_length] : !hal.buffer
// CHECK-DAG: %[[VIEW_OFFSET:.+]] = arith.constant 512
%view_offset = arith.constant 512 : index
@@ -18,5 +18,5 @@
shape([%dim0])
type(%type)
encoding(%encoding) : !hal.buffer_view
- return %view : !hal.buffer_view
+ util.return %view : !hal.buffer_view
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_ops.mlir
index 9be19f2..df6848a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/buffer_view_ops.mlir
@@ -1,27 +1,27 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @element_type
-func.func @element_type() -> i32 {
+util.func public @element_type() -> i32 {
// CHECK: %[[RET:.+]] = hal.element_type<f32> : i32
%element_type = hal.element_type<f32> : i32
- // CHECK: return %[[RET]]
- return %element_type : i32
+ // CHECK: util.return %[[RET]]
+ util.return %element_type : i32
}
// -----
// CHECK-LABEL: @encoding_type
-func.func @encoding_type() -> i32 {
+util.func public @encoding_type() -> i32 {
// CHECK: %[[RET:.+]] = hal.encoding_type<dense_row_major> : i32
%encoding_type = hal.encoding_type<dense_row_major> : i32
- // CHECK: return %[[RET]]
- return %encoding_type : i32
+ // CHECK: util.return %[[RET]]
+ util.return %encoding_type : i32
}
// -----
// CHECK-LABEL: @buffer_view_create
-func.func @buffer_view_create(%arg0: !hal.buffer, %arg1: index, %arg2: index) -> !hal.buffer_view {
+util.func public @buffer_view_create(%arg0: !hal.buffer, %arg1: index, %arg2: index) -> !hal.buffer_view {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c1_i32 = arith.constant 1 : i32
@@ -35,25 +35,25 @@
shape([%arg1, %arg2])
type(%c32_i32)
encoding(%c1_i32) : !hal.buffer_view
- return %view : !hal.buffer_view
+ util.return %view : !hal.buffer_view
}
// -----
// CHECK-LABEL: @buffer_view_buffer
-func.func @buffer_view_buffer(%arg0: !hal.buffer_view) -> !hal.buffer {
+util.func public @buffer_view_buffer(%arg0: !hal.buffer_view) -> !hal.buffer {
// CHECK: %buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
- return %buffer : !hal.buffer
+ util.return %buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @buffer_view_shape_queries
-func.func @buffer_view_shape_queries(%arg0: !hal.buffer_view) -> (index, index) {
+util.func public @buffer_view_shape_queries(%arg0: !hal.buffer_view) -> (index, index) {
// CHECK: %{{.+}} = hal.buffer_view.rank<%arg0 : !hal.buffer_view> : index
%0 = hal.buffer_view.rank<%arg0 : !hal.buffer_view> : index
// CHECK: %{{.+}} = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
- return %0, %1 : index, index
+ util.return %0, %1 : index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/channel_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/channel_ops.mlir
index 5a86fb6..c86ef92 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/channel_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/channel_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @channel_create
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64, %[[ID:.+]]: !util.buffer, %[[GROUP:.+]]: !util.buffer, %[[RANK:.+]]: i32, %[[COUNT:.+]]: i32)
-func.func @channel_create(%device: !hal.device, %affinity: i64, %id: !util.buffer, %group: !util.buffer, %rank: i32, %count: i32) {
+util.func public @channel_create(%device: !hal.device, %affinity: i64, %id: !util.buffer, %group: !util.buffer, %rank: i32, %count: i32) {
// CHECK: %channel = hal.channel.create
// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
// CHECK-SAME: affinity(%[[AFFINITY]])
@@ -18,14 +18,14 @@
group(%group)
rank(%rank)
count(%count) : !hal.channel
- return
+ util.return
}
// -----
// CHECK-LABEL: @channel_split
// CHECK-SAME: (%[[BASE_CHANNEL:.+]]: !hal.channel, %[[COLOR:.+]]: i32, %[[KEY:.+]]: i32)
-func.func @channel_split(%base_channel: !hal.channel, %color: i32, %key: i32) {
+util.func public @channel_split(%base_channel: !hal.channel, %color: i32, %key: i32) {
// CHECK: %channel = hal.channel.split<%[[BASE_CHANNEL]] : !hal.channel>
// CHECK-SAME: color(%[[COLOR]])
// CHECK-SAME: key(%[[KEY]])
@@ -34,15 +34,15 @@
color(%color)
key(%key)
flags(0) : !hal.channel
- return
+ util.return
}
// -----
// CHECK-LABEL: @channel_rank_and_count
// CHECK-SAME: (%[[CHANNEL:.+]]: !hal.channel)
-func.func @channel_rank_and_count(%channel: !hal.channel) -> (i32, i32) {
+util.func public @channel_rank_and_count(%channel: !hal.channel) -> (i32, i32) {
// CHECK: = hal.channel.rank_and_count<%[[CHANNEL]] : !hal.channel> : i32, i32
%rank, %count = hal.channel.rank_and_count<%channel : !hal.channel> : i32, i32
- return %rank, %count : i32, i32
+ util.return %rank, %count : i32, i32
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
index 3e1b94d..5ced86a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @skip_command_buffer_device
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-func.func @skip_command_buffer_device(%device: !hal.device) -> !hal.executable {
+util.func public @skip_command_buffer_device(%device: !hal.device) -> !hal.executable {
%cmd = hal.command_buffer.create device(%device : !hal.device)
mode(OneShot)
categories("Transfer|Dispatch") : !hal.command_buffer
@@ -14,7 +14,7 @@
%exe = hal.executable.lookup device(%device2 : !hal.device)
executable(@executable_name) : !hal.executable
- return %exe : !hal.executable
+ util.return %exe : !hal.executable
}
// -----
@@ -22,7 +22,7 @@
// CHECK-LABEL: @fold_buffer_subspan_into_fill_buffer
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer
-func.func @fold_buffer_subspan_into_fill_buffer(
+util.func public @fold_buffer_subspan_into_fill_buffer(
%cmd: !hal.command_buffer,
%buffer: !hal.buffer
) {
@@ -37,7 +37,7 @@
// CHECK-SAME: target(%[[BASE_BUFFER]] : !hal.buffer)[%c108192, %c8192]
target(%target_subspan : !hal.buffer)[%c100000, %c8192]
pattern(%c1234_i32 : i32)
- return
+ util.return
}
// -----
@@ -45,7 +45,7 @@
// CHECK-LABEL: @fold_buffer_subspan_into_copy_buffer
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer
-func.func @fold_buffer_subspan_into_copy_buffer(
+util.func public @fold_buffer_subspan_into_copy_buffer(
%cmd: !hal.command_buffer,
%buffer: !hal.buffer
) {
@@ -63,7 +63,7 @@
// CHECK-SAME: target(%[[BASE_BUFFER]] : !hal.buffer)[%c108192]
target(%target_subspan : !hal.buffer)[%c100000]
length(%c8192)
- return
+ util.return
}
// -----
@@ -72,7 +72,7 @@
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout,
// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer
-func.func @fold_buffer_subspan_into_push_descriptor_set(
+util.func public @fold_buffer_subspan_into_push_descriptor_set(
%cmd: !hal.command_buffer,
%layout: !hal.pipeline_layout,
%buffer: !hal.buffer
@@ -101,5 +101,5 @@
// CHECK-NEXT: %c2 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c262144]
%c2 = (%buffer : !hal.buffer)[%c4096, %c262144]
])
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
index 66026aa..cf9f38e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @command_buffer_create
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-func.func @command_buffer_create(%device: !hal.device) {
+util.func public @command_buffer_create(%device: !hal.device) {
// CHECK: %cmd = hal.command_buffer.create
// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
// CHECK-SAME: mode(OneShot)
@@ -10,34 +10,34 @@
%cmd = hal.command_buffer.create device(%device : !hal.device)
mode(OneShot)
categories("Transfer|Dispatch") : !hal.command_buffer
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_finalize
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer)
-func.func @command_buffer_finalize(%cmd: !hal.command_buffer) {
+util.func public @command_buffer_finalize(%cmd: !hal.command_buffer) {
// CHECK: hal.command_buffer.finalize<%[[CMD]] : !hal.command_buffer>
hal.command_buffer.finalize<%cmd : !hal.command_buffer>
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_device
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer)
-func.func @command_buffer_device(%cmd: !hal.command_buffer) {
+util.func public @command_buffer_device(%cmd: !hal.command_buffer) {
// CHECK: %0 = hal.command_buffer.device<%[[CMD]] : !hal.command_buffer> : !hal.device
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device
- return
+ util.return
}
// -----
// CHECK-LABEL: @command_buffer_execution_barrier
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer)
-func.func @command_buffer_execution_barrier(%cmd: !hal.command_buffer) {
+util.func public @command_buffer_execution_barrier(%cmd: !hal.command_buffer) {
// CHECK: hal.command_buffer.execution_barrier<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: source(CommandIssue)
// CHECK-SAME: target(CommandProcess)
@@ -46,7 +46,7 @@
source(CommandIssue)
target(CommandProcess)
flags(None)
- return
+ util.return
}
// -----
@@ -56,7 +56,7 @@
// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index,
// CHECK-SAME: %[[PATTERN:.+]]: i32)
-func.func @command_buffer_fill_buffer(
+util.func public @command_buffer_fill_buffer(
%cmd: !hal.command_buffer,
%buffer: !hal.buffer,
%offset: index,
@@ -68,7 +68,7 @@
hal.command_buffer.fill_buffer<%cmd : !hal.command_buffer>
target(%buffer : !hal.buffer)[%offset, %length]
pattern(%pattern : i32)
- return
+ util.return
}
// -----
@@ -78,7 +78,7 @@
// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[SRC_OFFSET:.+]]: index, %[[DST_OFFSET:.+]]: index,
// CHECK-SAME: %[[LENGTH:.+]]: index)
-func.func @command_buffer_copy_buffer(
+util.func public @command_buffer_copy_buffer(
%cmd: !hal.command_buffer,
%buffer: !hal.buffer,
%src_offset: index,
@@ -93,7 +93,7 @@
source(%buffer : !hal.buffer)[%src_offset]
target(%buffer : !hal.buffer)[%dst_offset]
length(%length)
- return
+ util.return
}
// -----
@@ -104,7 +104,7 @@
// CHECK-SAME: %[[PARAM:.+]]: i32,
// CHECK-SAME: %[[SEND_BUFFER:.+]]: !hal.buffer, %[[RECV_BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[COUNT:.+]]: index)
-func.func @command_buffer_collective(
+util.func public @command_buffer_collective(
%cmd: !hal.command_buffer,
%channel: !hal.channel,
%param: i32,
@@ -154,7 +154,7 @@
recv(%recv_buffer : !hal.buffer)[%c20, %c128]
count(%count)
- return
+ util.return
}
// -----
@@ -164,7 +164,7 @@
// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout,
// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[SLOT:.+]]: index
-func.func @command_buffer_push_descriptor_set(
+util.func public @command_buffer_push_descriptor_set(
%cmd: !hal.command_buffer,
%layout: !hal.pipeline_layout,
%buffer: !hal.buffer,
@@ -185,7 +185,7 @@
// CHECK-NEXT: %c1 = (%[[SLOT]] : index)[%c4, %c4096]
%c1 = (%slot : index)[%c4, %c4096]
])
- return
+ util.return
}
// -----
@@ -204,7 +204,7 @@
// CHECK-LABEL: @command_buffer_dispatch
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index)
-func.func @command_buffer_dispatch(
+util.func public @command_buffer_dispatch(
%cmd: !hal.command_buffer,
%x: index,
%y: index,
@@ -215,7 +215,7 @@
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer>
target(@ex::@backend::@entry0)
workgroups([%x, %y, %z])
- return
+ util.return
}
// -----
@@ -235,7 +235,7 @@
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[OFFSET:.+]]: index)
-func.func @command_buffer_dispatch_indirect(
+util.func public @command_buffer_dispatch_indirect(
%cmd: !hal.command_buffer,
%buffer: !hal.buffer,
%offset: index) {
@@ -245,5 +245,5 @@
hal.command_buffer.dispatch.indirect.symbol<%cmd : !hal.command_buffer>
target(@ex::@backend::@entry0)
workgroups(%buffer : !hal.buffer)[%offset]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir
index c50ed5d..86180ac 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @descriptor_set_layout_create
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-func.func @descriptor_set_layout_create(%device: !hal.device) {
+util.func public @descriptor_set_layout_create(%device: !hal.device) {
// CHECK: = hal.descriptor_set_layout.create
// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
// CHECK-SAME: flags("None")
@@ -16,5 +16,5 @@
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<1, storage_buffer>
]) : !hal.descriptor_set_layout
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_folding.mlir
index 878faab..a27d57a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_folding.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @ImmediatelyResolveDeviceQueueBarrier
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[SIGNAL_FENCE:.+]]: !hal.fence)
-func.func @ImmediatelyResolveDeviceQueueBarrier(%device: !hal.device, %signal_fence: !hal.fence) {
+util.func public @ImmediatelyResolveDeviceQueueBarrier(%device: !hal.device, %signal_fence: !hal.fence) {
%c-1_i64 = arith.constant -1 : i64
// CHECK-NOT: util.null
%wait_fence = util.null : !hal.fence
@@ -12,27 +12,27 @@
affinity(%c-1_i64)
wait(%wait_fence)
signal(%signal_fence)
- return
+ util.return
}
// -----
// CHECK-LABEL: @HoistDeviceQueueBarrierChain
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[SIGNAL_FENCE:.+]]: !hal.fence)
-func.func @HoistDeviceQueueBarrierChain(%device: !hal.device, %signal_fence: !hal.fence) {
+util.func public @HoistDeviceQueueBarrierChain(%device: !hal.device, %signal_fence: !hal.fence) {
%c-1_i64 = arith.constant -1 : i64
// CHECK-NOT: hal.fence.create
%temp_fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
- // CHECK: call @external_async_fn(%[[SIGNAL_FENCE]])
- call @external_async_fn(%temp_fence) : (!hal.fence) -> ()
+ // CHECK: util.call @external_async_fn(%[[SIGNAL_FENCE]])
+ util.call @external_async_fn(%temp_fence) : (!hal.fence) -> ()
// CHECK-NOT: hal.device.queue.execute
hal.device.queue.execute<%device : !hal.device>
affinity(%c-1_i64)
wait(%temp_fence)
signal(%signal_fence)
- return
+ util.return
}
-func.func private @external_async_fn(!hal.fence)
+util.func private @external_async_fn(!hal.fence)
// -----
@@ -41,7 +41,7 @@
// CHECK-LABEL: @HoistDeviceQueueBarrierChainOutOfOrder
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[CMD:.+]]: !hal.command_buffer, %[[WAIT_FENCE:.+]]: !hal.fence)
-func.func @HoistDeviceQueueBarrierChainOutOfOrder(%device: !hal.device, %cmd: !hal.command_buffer, %wait_fence: !hal.fence) -> !hal.fence {
+util.func public @HoistDeviceQueueBarrierChainOutOfOrder(%device: !hal.device, %cmd: !hal.command_buffer, %wait_fence: !hal.fence) -> !hal.fence {
%c-1_i64 = arith.constant -1 : i64
// CHECK: %[[FENCE1:.+]] = hal.fence.create {{.+}} {test.fence1}
%fence0 = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence attributes {test.fence0}
@@ -58,8 +58,8 @@
affinity(%c-1_i64)
wait(%fence0)
signal(%fence1)
- // CHECK: return %[[FENCE1]]
- return %fence1 : !hal.fence
+ // CHECK: util.return %[[FENCE1]]
+ util.return %fence1 : !hal.fence
}
// -----
@@ -69,7 +69,7 @@
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence,
// CHECK-SAME: %[[SIGNAL_FENCE:.+]]: !hal.fence)
-func.func @ElideDeviceQueueBarrierOp(
+util.func public @ElideDeviceQueueBarrierOp(
%device: !hal.device,
%cmd: !hal.command_buffer,
%wait_fence: !hal.fence,
@@ -112,6 +112,6 @@
wait(%fence1)
signal(%signal_fence)
- // CHECK-NEXT: return
- return
+ // CHECK-NEXT: util.return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_ops.mlir
index a04e5db..206c3bb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/device_ops.mlir
@@ -2,26 +2,26 @@
// CHECK-LABEL: @device_allocator
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-func.func @device_allocator(%device: !hal.device) -> !hal.allocator {
+util.func public @device_allocator(%device: !hal.device) -> !hal.allocator {
// CHECK: %allocator = hal.device.allocator<%[[DEVICE]] : !hal.device> : !hal.allocator
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator
- return %allocator : !hal.allocator
+ util.return %allocator : !hal.allocator
}
// -----
// CHECK-LABEL: @device_query
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-func.func @device_query(%device : !hal.device) -> (i1, i32) {
+util.func public @device_query(%device : !hal.device) -> (i1, i32) {
// CHECK: = hal.device.query<%[[DEVICE]] : !hal.device> key("sys" :: "foo") : i1, i32
%ok, %value = hal.device.query<%device : !hal.device> key("sys" :: "foo") : i1, i32
- return %ok, %value : i1, i32
+ util.return %ok, %value : i1, i32
}
// -----
// CHECK-LABEL: @device_queue_alloca
-func.func @device_queue_alloca(
+util.func public @device_queue_alloca(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence, %[[SIGNAL_FENCE:.+]]: !hal.fence,
@@ -41,13 +41,13 @@
type(DeviceLocal) usage(Transfer)
// CHECK-SAME: : !hal.buffer{%[[SIZE]]}
: !hal.buffer{%size}
- return %buffer : !hal.buffer
+ util.return %buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @device_queue_dealloca
-func.func @device_queue_dealloca(
+util.func public @device_queue_dealloca(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence, %[[SIGNAL_FENCE:.+]]: !hal.fence,
@@ -62,13 +62,13 @@
wait(%wait_fence) signal(%signal_fence)
// CHECK-SAME: buffer(%[[BUFFER]] : !hal.buffer)
buffer(%buffer : !hal.buffer)
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_read
-func.func @device_queue_read(
+util.func public @device_queue_read(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence, %[[SIGNAL_FENCE:.+]]: !hal.fence,
@@ -97,13 +97,13 @@
length(%length)
// CHECK-SAME: flags(0)
flags(0)
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_write
-func.func @device_queue_write(
+util.func public @device_queue_write(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence, %[[SIGNAL_FENCE:.+]]: !hal.fence,
@@ -132,13 +132,13 @@
length(%length)
// CHECK-SAME: flags(0)
flags(0)
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_execute
-func.func @device_queue_execute(
+util.func public @device_queue_execute(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !hal.fence, %[[SIGNAL_FENCE:.+]]: !hal.fence,
@@ -153,18 +153,18 @@
wait(%wait_fence) signal(%signal_fence)
// CHECK-SAME: commands([%[[CMD0]], %[[CMD1]]])
commands([%cmd0, %cmd1])
- return
+ util.return
}
// -----
// CHECK-LABEL: @device_queue_flush
-func.func @device_queue_flush(
+util.func public @device_queue_flush(
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[AFFINITY:.+]]: i64)
%device: !hal.device, %affinity: i64) {
// CHECK: hal.device.queue.flush<%[[DEVICE]] : !hal.device>
hal.device.queue.flush<%device : !hal.device>
// CHECK-SAME: affinity(%[[AFFINITY]])
affinity(%affinity)
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/devices_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/devices_ops.mlir
index 633318b..0283400 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/devices_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/devices_ops.mlir
@@ -1,18 +1,18 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @devices_count
-func.func @devices_count() -> index {
+util.func public @devices_count() -> index {
// CHECK: = hal.devices.count : index
%device_count = hal.devices.count : index
- return %device_count : index
+ util.return %device_count : index
}
// -----
// CHECK-LABEL: @devices_get
// CHECK-SAME: (%[[INDEX:.+]]: index)
-func.func @devices_get(%index: index) -> !hal.device {
+util.func public @devices_get(%index: index) -> !hal.device {
// CHECK: = hal.devices.get %[[INDEX]] : !hal.device
%device = hal.devices.get %index : !hal.device
- return %device : !hal.device
+ util.return %device : !hal.device
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
index acd872d..6da4866 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
@@ -76,7 +76,7 @@
hal.executable.condition(%device: !hal.device) -> i1 {
// CHECK-NEXT: %[[OK:.+]], %[[VALUE:.+]] = hal.device.query<%[[DEVICE]]
%ok, %value = hal.device.query<%device : !hal.device> key("some" :: "value") : i1, i32
- // CHECK-NEXT: return %[[OK]]
+ // CHECK-NEXT: hal.return %[[OK]]
hal.return %ok : i1
}
@@ -144,7 +144,7 @@
// CHECK-SAME: %[[DEVICE:.+]]: !hal.device,
// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.pipeline_layout,
// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.pipeline_layout
-func.func @executable_create(%device: !hal.device,
+util.func public @executable_create(%device: !hal.device,
%layout0: !hal.pipeline_layout,
%layout1: !hal.pipeline_layout) {
// CHECK: = hal.executable.create
@@ -154,7 +154,7 @@
%0 = hal.executable.create device(%device : !hal.device)
target(@exe::@binary1)
layouts([%layout0, %layout1]) : !hal.executable
- return
+ util.return
}
// -----
@@ -163,7 +163,7 @@
// CHECK-SAME: %[[DEVICE:.+]]: !hal.device,
// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.descriptor_set_layout,
// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.descriptor_set_layout
-func.func @pipeline_layout_create(%device: !hal.device,
+util.func public @pipeline_layout_create(%device: !hal.device,
%layout0: !hal.descriptor_set_layout,
%layout1: !hal.descriptor_set_layout) {
// CHECK: hal.pipeline_layout.create
@@ -173,7 +173,7 @@
%0 = hal.pipeline_layout.create device(%device : !hal.device)
push_constants(1)
layouts([%layout0, %layout1]) : !hal.pipeline_layout
- return
+ util.return
}
// -----
@@ -197,7 +197,7 @@
// CHECK-LABEL: @unresolved_workload
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device,
// CHECK-SAME: %[[WORKLOAD_0:.+]]: index, %[[WORKLOAD_1:.+]]: index)
-func.func @unresolved_workload(%device: !hal.device,
+util.func public @unresolved_workload(%device: !hal.device,
%workload_0: index, %workload_1: index) -> (index, index, index) {
// CHECK: %[[WORKGROUP_X:.+]], %[[WORKGROUP_Y:.+]], %[[WORKGROUP_Z:.+]] =
// CHECK-SAME: hal.executable.calculate_workgroups
@@ -208,6 +208,6 @@
device(%device : !hal.device)
target(@unresolved_workload_ex::@backend::@entry0)
workload([%workload_0, %workload_1]) : index, index, index
- // CHECK: return %[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]
- return %workgroups#0, %workgroups#1, %workgroups#2 : index, index, index
+ // CHECK: util.return %[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]
+ util.return %workgroups#0, %workgroups#1, %workgroups#2 : index, index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
index 3c8fd84..2d401c5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/experimental_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @file_from_memory
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[BUFFER:.+]]: !util.buffer)
-func.func @file_from_memory(%device: !hal.device, %buffer: !util.buffer) -> !hal.file {
+util.func public @file_from_memory(%device: !hal.device, %buffer: !util.buffer) -> !hal.file {
// CHECK-DAG: %[[AFFINITY:.+]] = arith.constant -1
%affinity = arith.constant -1 : i64
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 100
@@ -24,5 +24,5 @@
access(Read)
buffer(%buffer : !util.buffer)[%offset for %length]
flags(%flags) : !hal.file
- return %file : !hal.file
+ util.return %file : !hal.file
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_folding.mlir
index 2127d89..0bbd190 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_folding.mlir
@@ -5,10 +5,10 @@
// the program to simplify submissions.
// CHECK-LABEL: @fence_create_unused
-func.func @fence_create_unused(%device: !hal.device) {
+util.func public @fence_create_unused(%device: !hal.device) {
// CHECK-NOT: hal.fence.create
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
- return
+ util.return
}
// -----
@@ -17,10 +17,10 @@
// CHECK-LABEL: @fence_join_one
// CHECK-SAME: %[[ARG:.+]]: !hal.fence
-func.func @fence_join_one(%arg: !hal.fence) -> !hal.fence {
+util.func public @fence_join_one(%arg: !hal.fence) -> !hal.fence {
%join = hal.fence.join at([%arg]) -> !hal.fence
- // CHECK: return %[[ARG]]
- return %join : !hal.fence
+ // CHECK: util.return %[[ARG]]
+ util.return %join : !hal.fence
}
// -----
@@ -28,11 +28,11 @@
// Tests that a fence join with no operands folds into a util.null.
// CHECK-LABEL: @fence_join_empty
-func.func @fence_join_empty() -> !hal.fence {
+util.func public @fence_join_empty() -> !hal.fence {
// CHECK: %[[JOIN:.+]] = util.null : !hal.fence
%join = hal.fence.join at([]) -> !hal.fence
- // CHECK: return %[[JOIN]]
- return %join : !hal.fence
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !hal.fence
}
// -----
@@ -41,13 +41,13 @@
// CHECK-LABEL: @fence_join_null
// CHECK-SAME: (%[[ARG0:.+]]: !hal.fence, %[[ARG1:.+]]: !hal.fence)
-func.func @fence_join_null(%arg0: !hal.fence, %arg1: !hal.fence) -> !hal.fence {
+util.func public @fence_join_null(%arg0: !hal.fence, %arg1: !hal.fence) -> !hal.fence {
// CHECK-NOT: util.null
%null = util.null : !hal.fence
// CHECK: %[[JOIN:.+]] = hal.fence.join at([%[[ARG0]], %[[ARG1]]]) -> !hal.fence
%join = hal.fence.join at([%arg0, %null, %arg1]) -> !hal.fence
- // CHECK: return %[[JOIN]]
- return %join : !hal.fence
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !hal.fence
}
// -----
@@ -56,11 +56,11 @@
// CHECK-LABEL: @fence_join_duplicate_fences
// CHECK-SAME: %[[FENCE0:.+]]: !hal.fence, %[[FENCE1:.+]]: !hal.fence
-func.func @fence_join_duplicate_fences(%fence0: !hal.fence, %fence1: !hal.fence) -> !hal.fence {
+util.func public @fence_join_duplicate_fences(%fence0: !hal.fence, %fence1: !hal.fence) -> !hal.fence {
// CHECK: %[[JOIN:.+]] = hal.fence.join at([%[[FENCE0]], %[[FENCE1]]]) -> !hal.fence
%join = hal.fence.join at([%fence0, %fence1, %fence0]) -> !hal.fence
- // CHECK: return %[[JOIN]]
- return %join : !hal.fence
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !hal.fence
}
// -----
@@ -70,19 +70,19 @@
// is created and the time it is signaled.
// CHECK-LABEL: @fence_elide_signaled
-func.func @fence_elide_signaled(%device: !hal.device) -> !hal.fence {
+util.func public @fence_elide_signaled(%device: !hal.device) -> !hal.fence {
// CHECK-NOT: hal.fence.create
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
// Ok to have other things inbetween so long as they don't touch the fence.
- // CHECK: call @external_nop_call
- call @external_nop_call() : () -> ()
+ // CHECK: util.call @external_nop_call
+ util.call @external_nop_call() : () -> ()
// CHECK-NOT: hal.fence.signal
hal.fence.signal<%fence : !hal.fence>
// CHECK: %[[FENCE:.+]] = util.null : !hal.fence
- // CHECK: return %[[FENCE]]
- return %fence : !hal.fence
+ // CHECK: util.return %[[FENCE]]
+ util.return %fence : !hal.fence
}
-func.func private @external_nop_call()
+util.func private @external_nop_call()
// -----
@@ -90,30 +90,30 @@
// on between when it is created and when it is signaled.
// CHECK-LABEL: @fence_cannot_elide_signaled
-func.func @fence_cannot_elide_signaled(%device: !hal.device) -> !hal.fence {
+util.func public @fence_cannot_elide_signaled(%device: !hal.device) -> !hal.fence {
// CHECK: hal.fence.create
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
// Block the elision as the external call may wait on the fence.
- // CHECK: call @external_wait_call
- call @external_wait_call(%fence) : (!hal.fence) -> ()
+ // CHECK: util.call @external_wait_call
+ util.call @external_wait_call(%fence) : (!hal.fence) -> ()
// CHECK: hal.fence.signal
hal.fence.signal<%fence : !hal.fence>
- // CHECK: return
- return %fence : !hal.fence
+ // CHECK: util.return
+ util.return %fence : !hal.fence
}
-func.func private @external_wait_call(!hal.fence)
+util.func private @external_wait_call(!hal.fence)
// -----
// Tests that awaits with no fences are elided.
// CHECK-LABEL: @fence_await_none
-func.func @fence_await_none() -> i32 {
+util.func public @fence_await_none() -> i32 {
%timeout = arith.constant 123 : i32
// CHECK: %[[STATUS:.+]] = arith.constant 0 : i32
%status = hal.fence.await until([]) timeout_millis(%timeout) : i32
- // CHECK: return %[[STATUS]]
- return %status : i32
+ // CHECK: util.return %[[STATUS]]
+ util.return %status : i32
}
// -----
@@ -122,14 +122,14 @@
// CHECK-LABEL: @fence_await_null
// CHECK-SAME: %[[ARG:.+]]: !hal.fence
-func.func @fence_await_null(%arg: !hal.fence) -> i32 {
+util.func public @fence_await_null(%arg: !hal.fence) -> i32 {
%timeout = arith.constant 123 : i32
// CHECK-NOT: util.null
%null = util.null : !hal.fence
// CHECK: %[[STATUS:.+]] = hal.fence.await until([%[[ARG]]])
%status = hal.fence.await until([%arg, %null]) timeout_millis(%timeout) : i32
- // CHECK: return %[[STATUS]]
- return %status : i32
+ // CHECK: util.return %[[STATUS]]
+ util.return %status : i32
}
// -----
@@ -138,10 +138,10 @@
// CHECK-LABEL: @fence_await_duplicate_fences
// CHECK-SAME: %[[FENCE0:.+]]: !hal.fence, %[[FENCE1:.+]]: !hal.fence
-func.func @fence_await_duplicate_fences(%fence0: !hal.fence, %fence1: !hal.fence) -> i32 {
+util.func public @fence_await_duplicate_fences(%fence0: !hal.fence, %fence1: !hal.fence) -> i32 {
%timeout = arith.constant 123 : i32
// CHECK: %[[STATUS:.+]] = hal.fence.await until([%[[FENCE0]], %[[FENCE1]]])
%status = hal.fence.await until([%fence0, %fence1, %fence0]) timeout_millis(%timeout) : i32
- // CHECK: return %[[STATUS]]
- return %status : i32
+ // CHECK: util.return %[[STATUS]]
+ util.return %status : i32
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_ops.mlir
index 65f7d60..59a42cf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/fence_ops.mlir
@@ -1,53 +1,53 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @fence_create
-func.func @fence_create(%arg0: !hal.device) -> !hal.fence {
+util.func public @fence_create(%arg0: !hal.device) -> !hal.fence {
// CHECK: = hal.fence.create device(%arg0 : !hal.device) flags("None") : !hal.fence
%fence = hal.fence.create device(%arg0 : !hal.device) flags("None") : !hal.fence
- return %fence : !hal.fence
+ util.return %fence : !hal.fence
}
// -----
// CHECK-LABEL: @fence_join
-func.func @fence_join(%arg0: !hal.fence, %arg1: !hal.fence) -> !hal.fence {
+util.func public @fence_join(%arg0: !hal.fence, %arg1: !hal.fence) -> !hal.fence {
// CHECK: = hal.fence.join at([%arg0, %arg1]) -> !hal.fence
%fence = hal.fence.join at([%arg0, %arg1]) -> !hal.fence
- return %fence : !hal.fence
+ util.return %fence : !hal.fence
}
// -----
// CHECK-LABEL: @fence_query
-func.func @fence_query(%arg0: !hal.fence) -> i32 {
+util.func public @fence_query(%arg0: !hal.fence) -> i32 {
// CHECK: = hal.fence.query<%arg0 : !hal.fence> : i32
%status = hal.fence.query<%arg0 : !hal.fence> : i32
- return %status : i32
+ util.return %status : i32
}
// -----
// CHECK-LABEL: @fence_signal
-func.func @fence_signal(%arg0: !hal.fence) {
+util.func public @fence_signal(%arg0: !hal.fence) {
// CHECK: hal.fence.signal<%arg0 : !hal.fence>
hal.fence.signal<%arg0 : !hal.fence>
- return
+ util.return
}
// -----
// CHECK-LABEL: @fence_fail
-func.func @fence_fail(%arg0: !hal.fence, %arg1: i32) {
+util.func public @fence_fail(%arg0: !hal.fence, %arg1: i32) {
// CHECK: hal.fence.fail<%arg0 : !hal.fence> status(%arg1)
hal.fence.fail<%arg0 : !hal.fence> status(%arg1)
- return
+ util.return
}
// -----
// CHECK-LABEL: @fence_await
-func.func @fence_await(%arg0: !hal.fence, %arg1: !hal.fence, %arg2: i32) -> i32 {
+util.func public @fence_await(%arg0: !hal.fence, %arg1: !hal.fence, %arg2: i32) -> i32 {
// CHECK: = hal.fence.await until([%arg0, %arg1]) timeout_millis(%arg2) : i32
%status = hal.fence.await until([%arg0, %arg1]) timeout_millis(%arg2) : i32
- return %status : i32
+ util.return %status : i32
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir
index 742128a..80cea09 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/invalid.mlir
@@ -1,20 +1,20 @@
// RUN: iree-opt --split-input-file --verify-diagnostics %s
util.global mutable @var : !hal.buffer
-func.func @fn(%arg0: !hal.buffer_view) {
+util.func public @fn(%arg0: !hal.buffer_view) {
// expected-error @+1 {{global "var" is '!hal.buffer' but store is '!hal.buffer_view'}}
util.global.store %arg0, @var : !hal.buffer_view
- return
+ util.return
}
// -----
util.global mutable @var : !hal.buffer
-func.func @fn(%arg0: !hal.buffer_view) {
+util.func public @fn(%arg0: !hal.buffer_view) {
%0 = util.global.address @var : !util.ptr<!hal.buffer>
// expected-error @+1 {{global pointer is '!hal.buffer' but store is '!hal.buffer_view'}}
util.global.store.indirect %arg0, %0 : !hal.buffer_view -> !util.ptr<!hal.buffer>
- return
+ util.return
}
// -----
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_op_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_op_folding.mlir
index f7f295d..131b439 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_op_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_op_folding.mlir
@@ -1,13 +1,13 @@
// RUN: iree-opt --split-input-file --canonicalize -cse %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @foldTensorImportExport
-func.func @foldTensorImportExport(%arg0: !hal.buffer_view) -> !hal.buffer_view {
+util.func public @foldTensorImportExport(%arg0: !hal.buffer_view) -> !hal.buffer_view {
// CHECK-NOT: hal.tensor.import
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<5xi32>
// CHECK-NOT: hal.tensor.export
%1 = hal.tensor.export %0 : tensor<5xi32> -> !hal.buffer_view
- // CHECK: return %arg0 : !hal.buffer_view
- return %1 : !hal.buffer_view
+ // CHECK: util.return %arg0 : !hal.buffer_view
+ util.return %1 : !hal.buffer_view
}
// -----
@@ -18,33 +18,33 @@
// For now we just don't fold.
// CHECK-LABEL: @foldTensorImportExportTypeMismatch
-func.func @foldTensorImportExportTypeMismatch(%arg0: !hal.buffer_view) -> !hal.buffer {
+util.func public @foldTensorImportExportTypeMismatch(%arg0: !hal.buffer_view) -> !hal.buffer {
// CHECK: hal.tensor.import
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<5xi32>
// CHECK: hal.tensor.export
%1 = hal.tensor.export %0 : tensor<5xi32> -> !hal.buffer
- return %1 : !hal.buffer
+ util.return %1 : !hal.buffer
}
// -----
// CHECK-LABEL: @foldTensorExportImport
-func.func @foldTensorExportImport(%arg0: tensor<5xi32>) -> tensor<5xi32> {
+util.func public @foldTensorExportImport(%arg0: tensor<5xi32>) -> tensor<5xi32> {
// CHECK-NOT: hal.tensor.export
%0 = hal.tensor.export %arg0 : tensor<5xi32> -> !hal.buffer_view
// CHECK-NOT: hal.tensor.import
%1 = hal.tensor.import %0 : !hal.buffer_view -> tensor<5xi32>
- // CHECK: return %arg0 : tensor<5xi32>
- return %1 : tensor<5xi32>
+ // CHECK: util.return %arg0 : tensor<5xi32>
+ util.return %1 : tensor<5xi32>
}
// -----
// CHECK-LABEL: @DeduplicateTensorBarrierSources
// CHECK-SAME: (%[[ARG0:.+]]: tensor<5xi32>, %[[ARG1:.+]]: tensor<6xi32>, %[[FENCE:.+]]: !hal.fence)
-func.func @DeduplicateTensorBarrierSources(%arg0: tensor<5xi32>, %arg1: tensor<6xi32>, %fence: !hal.fence) -> (tensor<5xi32>, tensor<6xi32>, tensor<5xi32>) {
+util.func public @DeduplicateTensorBarrierSources(%arg0: tensor<5xi32>, %arg1: tensor<6xi32>, %fence: !hal.fence) -> (tensor<5xi32>, tensor<6xi32>, tensor<5xi32>) {
// CHECK: %[[RESULTS:.+]]:2 = hal.tensor.barrier join(%[[ARG0]], %[[ARG1]] : tensor<5xi32>, tensor<6xi32>) => %[[FENCE]] : !hal.fence
%0:3 = hal.tensor.barrier join(%arg0, %arg1, %arg0 : tensor<5xi32>, tensor<6xi32>, tensor<5xi32>) => %fence : !hal.fence
- // CHECK: return %[[RESULTS]]#0, %[[RESULTS]]#1, %[[RESULTS]]#0 : tensor<5xi32>, tensor<6xi32>, tensor<5xi32>
- return %0#0, %0#1, %0#2 : tensor<5xi32>, tensor<6xi32>, tensor<5xi32>
+ // CHECK: util.return %[[RESULTS]]#0, %[[RESULTS]]#1, %[[RESULTS]]#0 : tensor<5xi32>, tensor<6xi32>, tensor<5xi32>
+ util.return %0#0, %0#1, %0#2 : tensor<5xi32>, tensor<6xi32>, tensor<5xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
index 45e1c6f..bcee3c9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
@@ -1,55 +1,55 @@
// RUN: iree-opt --split-input-file --mlir-print-local-scope %s | iree-opt --split-input-file --mlir-print-local-scope | FileCheck %s
// CHECK-LABEL: @tensorImportStatic
-func.func @tensorImportStatic(%arg0: !hal.buffer_view) -> tensor<5xi32> {
+util.func public @tensorImportStatic(%arg0: !hal.buffer_view) -> tensor<5xi32> {
// CHECK: hal.tensor.import %arg0 "hello" : !hal.buffer_view -> tensor<5xi32>
%0 = hal.tensor.import %arg0 "hello" : !hal.buffer_view -> tensor<5xi32>
- return %0 : tensor<5xi32>
+ util.return %0 : tensor<5xi32>
}
// -----
// CHECK-LABEL: @tensorImportDynamic
-func.func @tensorImportDynamic(%arg0: !hal.buffer_view, %arg1: index) -> tensor<?x3xi32> {
+util.func public @tensorImportDynamic(%arg0: !hal.buffer_view, %arg1: index) -> tensor<?x3xi32> {
// CHECK: hal.tensor.import %arg0 : !hal.buffer_view -> tensor<?x3xf32> as tensor<?x3xi32>{%arg1}
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<?x3xf32> as tensor<?x3xi32>{%arg1}
- return %0 : tensor<?x3xi32>
+ util.return %0 : tensor<?x3xi32>
}
// -----
// CHECK-LABEL: @tensorImportAsync
-func.func @tensorImportAsync(%arg0: !hal.buffer_view, %arg1: !hal.fence) -> tensor<5xi32> {
+util.func public @tensorImportAsync(%arg0: !hal.buffer_view, %arg1: !hal.fence) -> tensor<5xi32> {
// CHECK: hal.tensor.import wait(%arg1) => %arg0 : !hal.buffer_view -> tensor<5xi32>
%0 = hal.tensor.import wait(%arg1) => %arg0 : !hal.buffer_view -> tensor<5xi32>
- return %0 : tensor<5xi32>
+ util.return %0 : tensor<5xi32>
}
// -----
// CHECK-LABEL: @tensorExportDynamic
-func.func @tensorExportDynamic(%arg0: tensor<?x3xi32>, %arg1: index) -> !hal.buffer_view {
+util.func public @tensorExportDynamic(%arg0: tensor<?x3xi32>, %arg1: index) -> !hal.buffer_view {
// CHECK: hal.tensor.export %arg0 "goodbye" : tensor<?x3xf32> as tensor<?x3xi32>{%arg1} -> !hal.buffer_view
%0 = hal.tensor.export %arg0 "goodbye" : tensor<?x3xf32> as tensor<?x3xi32>{%arg1} -> !hal.buffer_view
- return %0 : !hal.buffer_view
+ util.return %0 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @tensorExportInPlace
-func.func @tensorExportInPlace(%arg0: tensor<?x3xi32>, %arg1: index, %arg2: !hal.buffer) -> !hal.buffer_view {
+util.func public @tensorExportInPlace(%arg0: tensor<?x3xi32>, %arg1: index, %arg2: !hal.buffer) -> !hal.buffer_view {
// CHECK: hal.tensor.export %arg0 into(%arg2 : !hal.buffer) : tensor<?x3xf32> as tensor<?x3xi32>{%arg1} -> !hal.buffer_view
%0 = hal.tensor.export %arg0 into(%arg2 : !hal.buffer) : tensor<?x3xf32> as tensor<?x3xi32>{%arg1} -> !hal.buffer_view
- return %0 : !hal.buffer_view
+ util.return %0 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @tensorBarrier
-func.func @tensorBarrier(%arg0: tensor<3xf32>, %arg1: tensor<4xf32>, %arg2: !hal.fence) -> (tensor<3xf32>, tensor<4xf32>) {
+util.func public @tensorBarrier(%arg0: tensor<3xf32>, %arg1: tensor<4xf32>, %arg2: !hal.fence) -> (tensor<3xf32>, tensor<4xf32>) {
// CHECK: :2 = hal.tensor.barrier join(%arg0, %arg1 : tensor<3xf32>, tensor<4xf32>) => %arg2 : !hal.fence
%0:2 = hal.tensor.barrier join(%arg0, %arg1 : tensor<3xf32>, tensor<4xf32>) => %arg2 : !hal.fence
- return %0#0, %0#1 : tensor<3xf32>, tensor<4xf32>
+ util.return %0#0, %0#1 : tensor<3xf32>, tensor<4xf32>
}
// -----
@@ -57,8 +57,8 @@
// Demonstrates the full functionality of an extern dispatch op.
// Note that some fields are optional.
-// CHECK-LABEL: func.func @dispatchExtern
-func.func @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %arg2: i32) -> tensor<8xi32> {
+// CHECK-LABEL: util.func public @dispatchExtern
+util.func public @dispatchExtern(%arg0: tensor<4xi32>, %arg1: tensor<8xi32>, %arg2: i32) -> tensor<8xi32> {
// CHECK-DAG: %[[WORKLOAD_X:.+]] = arith.constant 100
%workload_x = arith.constant 100 : index
// CHECK-DAG: %[[WORKLOAD_Y:.+]] = arith.constant 50
@@ -113,6 +113,6 @@
// CHECK: } ordinal(300) = [#hal.executable.object<{path = "c.o"}>]
} ordinal(300) = [#hal.executable.object<{path = "c.o"}>]
})
- // CHECK: return %[[RESULT]]
- return %0 : tensor<8xi32>
+ // CHECK: util.return %[[RESULT]]
+ util.return %0 : tensor<8xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
index 3e7e4b9..25ff8b9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
@@ -16,7 +16,6 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ToolOutputFile.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
@@ -228,7 +227,8 @@
// Create an exported benchmark function that runs the dispatches.
auto funcType =
moduleBuilder.getFunctionType({moduleBuilder.getI32Type()}, {});
- auto funcOp = moduleBuilder.create<func::FuncOp>(loc, baseName, funcType);
+ auto funcOp =
+ moduleBuilder.create<IREE::Util::FuncOp>(loc, baseName, funcType);
funcOp.setVisibility(SymbolTable::Visibility::Public);
// Mark the function as being a dispatch benchmark.
@@ -377,7 +377,7 @@
funcBuilder.create<IREE::Util::StatusCheckOkOp>(
loc, fenceOp.getStatus(), "failed to wait on timepoint");
- funcBuilder.create<mlir::func::ReturnOp>(loc);
+ funcBuilder.create<IREE::Util::ReturnOp>(loc);
}
// Builds a module exporting one function for each dispatch configuration
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp
index 05c3868..2c4a595 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeDispatchInstrumentation.cpp
@@ -18,7 +18,6 @@
#include "iree/schemas/instruments/dispatch.h"
#include "iree/schemas/instruments/dispatch_def_builder.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
@@ -310,7 +309,7 @@
// Create query function for getting the instrumentation data.
auto listType = moduleBuilder.getType<IREE::Util::ListType>(
moduleBuilder.getType<IREE::Util::VariantType>());
- auto queryOp = moduleBuilder.create<func::FuncOp>(
+ auto queryOp = moduleBuilder.create<IREE::Util::FuncOp>(
loc, "__query_instruments",
moduleBuilder.getFunctionType({listType}, {}));
{
@@ -359,7 +358,7 @@
}
appendListItems(loc, listArg, iovecs, queryBuilder);
- queryBuilder.create<func::ReturnOp>(loc);
+ queryBuilder.create<IREE::Util::ReturnOp>(loc);
}
}
};
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp
index 01ba030..e60747a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeResourceCaches.cpp
@@ -13,7 +13,6 @@
#include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -288,16 +287,16 @@
auto funcName = (StringRef("__constant_block_") +
std::to_string(nextUniqueConstantBlockId++))
.str();
- auto funcOp = moduleBuilder.create<func::FuncOp>(blockOp.getLoc(), funcName,
- blockOp.getFunctionType());
+ auto funcOp = moduleBuilder.create<IREE::Util::FuncOp>(
+ blockOp.getLoc(), funcName, blockOp.getFunctionType());
funcOp.setPrivate();
funcOp.getRegion().takeBody(blockOp.getRegion());
// Replace the hal.return with a func.return.
for (auto returnOp :
llvm::make_early_inc_range(funcOp.getOps<IREE::HAL::ReturnOp>())) {
- OpBuilder(returnOp).create<func::ReturnOp>(returnOp.getLoc(),
- returnOp.getOperands());
+ OpBuilder(returnOp).create<IREE::Util::ReturnOp>(returnOp.getLoc(),
+ returnOp.getOperands());
returnOp.erase();
}
@@ -306,8 +305,8 @@
if (funcOp.getNumArguments() > 0) {
callOperands.push_back(device);
}
- auto callOp = callerBuilder.create<func::CallOp>(blockOp.getLoc(), funcOp,
- callOperands);
+ auto callOp = callerBuilder.create<IREE::Util::CallOp>(
+ blockOp.getLoc(), funcOp, callOperands);
return llvm::map_to_vector(callOp.getResults(),
[](OpResult result) -> Value { return result; });
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td
index 0c8f376..0480bf8 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.td
@@ -410,7 +410,6 @@
];
let dependentDialects = [
"mlir::arith::ArithDialect",
- "mlir::func::FuncDialect",
"IREE::HAL::HALDialect",
"IREE::Stream::StreamDialect",
"IREE::Util::UtilDialect",
@@ -427,7 +426,6 @@
}];
let dependentDialects = [
"mlir::arith::ArithDialect",
- "mlir::func::FuncDialect",
"mlir::scf::SCFDialect",
"IREE::HAL::HALDialect",
"IREE::Util::UtilDialect",
@@ -511,7 +509,6 @@
];
let dependentDialects = [
"mlir::arith::ArithDialect",
- "mlir::func::FuncDialect",
"mlir::scf::SCFDialect",
"IREE::HAL::HALDialect",
"IREE::Util::UtilDialect",
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
index 5040104..590649e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
@@ -48,9 +48,9 @@
}
}
- // CHECK-LABEL: func.func @simpleDispatch
+ // CHECK-LABEL: util.func public @simpleDispatch
// CHECK-SAME: (%[[ARG0:.+]]: !hal.buffer_view, %[[ARG1:.+]]: !hal.buffer_view) -> !hal.buffer_view
- func.func @simpleDispatch(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
+ util.func public @simpleDispatch(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c16 = arith.constant 16 : index
@@ -156,8 +156,8 @@
// CHECK-SAME: type(%[[ELEMENT_TYPE]])
// CHECK-SAME: encoding(%[[ENCODING_TYPE]])
%result_view = stream.tensor.export %result_ready : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view
- // CHECK: return
- return %result_view : !hal.buffer_view
+ // CHECK: util.return
+ util.return %result_view : !hal.buffer_view
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
index cdac0d8..c244510 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
@@ -66,7 +66,7 @@
// CHECK: %[[BUFFER:.+]] = hal.allocator.allocate<%{{.+}} : !hal.allocator> affinity(%{{.+}}) type("DeviceVisible|DeviceLocal") usage("{{.+}}Dispatch{{.+}}") : !hal.buffer{%c768}
// CHECK-NEXT: util.global.store %[[BUFFER]], @ex0_embedded_elf_x86_64_dispatch0_512_buffer : !hal.buffer
- // CHECK: func.func @ex0_embedded_elf_x86_64_dispatch0_512(%arg0: i32)
+ // CHECK: util.func public @ex0_embedded_elf_x86_64_dispatch0_512(%arg0: i32)
// CHECK-SAME: attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} {
// CHECK: %[[BATCH_SIZE:.+]] = arith.index_cast %arg0 : i32 to index
@@ -104,14 +104,14 @@
// ===========================================================================
// CHECK: util.global private mutable @ex0_embedded_elf_x86_64_dispatch1_512x1_buffer : !hal.buffer
- // CHECK: func.func @ex0_embedded_elf_x86_64_dispatch1_512x1(%arg0: i32)
+ // CHECK: util.func public @ex0_embedded_elf_x86_64_dispatch1_512x1(%arg0: i32)
// CHECK: hal.command_buffer.dispatch.symbol<%{{.+}} : !hal.command_buffer> target(@ex0::@embedded_elf_x86_64::@dispatch1)
// CHECK: util.global private mutable @ex0_embedded_elf_x86_64_dispatch1_128x32_buffer : !hal.buffer
- // CHECK: func.func @ex0_embedded_elf_x86_64_dispatch1_128x32(%arg0: i32)
+ // CHECK: util.func public @ex0_embedded_elf_x86_64_dispatch1_128x32(%arg0: i32)
// CHECK: hal.command_buffer.dispatch.symbol<%{{.+}} : !hal.command_buffer> target(@ex0::@embedded_elf_x86_64::@dispatch1)
- func.func private @main(%dynamic_arg: i32) -> !stream.timepoint {
+ util.func public @main(%dynamic_arg: i32) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
@@ -170,6 +170,6 @@
]}
} => !stream.timepoint
%39 = stream.resource.dealloca await(%6) => %result : !stream.resource<transient>{%c128} => !stream.timepoint
- return %39 : !stream.timepoint
+ util.return %39 : !stream.timepoint
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
index 781bda3..861d4e0 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
@@ -1,10 +1,10 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-hal-elide-redundant-commands))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-hal-elide-redundant-commands))' %s | FileCheck %s
// Tests that redundant barriers are elided but barriers gaurding ops are not.
// CHECK-LABEL: @elideRedundantBarriers
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout)
-func.func @elideRedundantBarriers(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
+util.func public @elideRedundantBarriers(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c42_i32 = arith.constant 42 : i32
@@ -16,14 +16,14 @@
hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c42_i32]) : i32
// CHECK: hal.command_buffer.execution_barrier
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
// CHECK-LABEL: @elidePushConstants
-func.func @elidePushConstants(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
+util.func public @elidePushConstants(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
%c0 = arith.constant 0 : i32
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
@@ -43,14 +43,14 @@
layout(%pipeline_layout : !hal.pipeline_layout)
offset(0)
values([%c0, %c1]) : i32, i32
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
// CHECK-LABEL: @elidePushConstantsPrefix
-func.func @elidePushConstantsPrefix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
+util.func public @elidePushConstantsPrefix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
%c0 = arith.constant 0 : i32
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
@@ -70,14 +70,14 @@
layout(%pipeline_layout : !hal.pipeline_layout)
offset(1)
values([%c1]) : i32
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
// CHECK-LABEL: @elidePushConstantsSuffix
-func.func @elidePushConstantsSuffix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
+util.func public @elidePushConstantsSuffix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
%c0 = arith.constant 0 : i32
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
@@ -94,8 +94,8 @@
layout(%pipeline_layout : !hal.pipeline_layout)
offset(1)
values([%c0, %c2]) : i32, i32
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
@@ -104,7 +104,7 @@
// CHECK-LABEL: @elidePushDescriptorSet
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout, %[[BUFFER0:.+]]: !hal.buffer, %[[BUFFER1:.+]]: !hal.buffer)
-func.func @elidePushDescriptorSet(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout, %buffer0: !hal.buffer, %buffer1: !hal.buffer) {
+util.func public @elidePushDescriptorSet(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout, %buffer0: !hal.buffer, %buffer1: !hal.buffer) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[SIZE0:.+]] = arith.constant 100
@@ -124,6 +124,6 @@
%c0 = (%buffer0 : !hal.buffer)[%c0, %size0],
%c1 = (%buffer1 : !hal.buffer)[%c0, %size1]
])
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir
index a26a374..1a913ed 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/fixup_legacy_sync.mlir
@@ -5,10 +5,10 @@
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {legacy_sync}>]} {
// CHECK-LABEL: @command_buffer_reusable
-func.func @command_buffer_reusable(%arg0: !hal.device) {
+util.func public @command_buffer_reusable(%arg0: !hal.device) {
// CHECK: hal.command_buffer.create device(%arg0 : !hal.device) mode("None")
%cmd = hal.command_buffer.create device(%arg0 : !hal.device) mode("None") categories("Transfer|Dispatch") : !hal.command_buffer
- return
+ util.return
}
} // module
@@ -18,10 +18,10 @@
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {legacy_sync}>]} {
// CHECK-LABEL: @command_buffer_oneshot
-func.func @command_buffer_oneshot(%arg0: !hal.device) {
+util.func public @command_buffer_oneshot(%arg0: !hal.device) {
// CHECK: hal.command_buffer.create device(%arg0 : !hal.device) mode("OneShot|AllowInlineExecution")
%cmd = hal.command_buffer.create device(%arg0 : !hal.device) mode(OneShot) categories("Transfer|Dispatch") : !hal.command_buffer
- return
+ util.return
}
} // module
@@ -34,10 +34,10 @@
#hal.device.target<"vulkan", {}>
]} {
// CHECK-LABEL: @legacy_mode_not_required
-func.func @legacy_mode_not_required(%arg0: !hal.device) {
+util.func public @legacy_mode_not_required(%arg0: !hal.device) {
// CHECK: hal.command_buffer.create device(%arg0 : !hal.device) mode(OneShot)
%cmd = hal.command_buffer.create device(%arg0 : !hal.device) mode(OneShot) categories("Transfer|Dispatch") : !hal.command_buffer
- return
+ util.return
}
} // module
@@ -50,7 +50,7 @@
#hal.device.target<"vulkan", {legacy_sync}>
]} {
// CHECK-LABEL: @mixed_legacy_mode_required
-func.func @mixed_legacy_mode_required(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
+util.func public @mixed_legacy_mode_required(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
%affinity = arith.constant 0 : i64
// CHECK: hal.fence.await
// CHECK: hal.device.queue.execute
@@ -59,7 +59,7 @@
affinity(%affinity)
wait(%wait) signal(%signal)
commands([%cmd])
- return
+ util.return
}
} // module
@@ -70,7 +70,7 @@
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {legacy_sync}>]} {
// CHECK-LABEL: @blocking_execute
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[WAIT:.+]]: !hal.fence, %[[CMD:.+]]: !hal.command_buffer, %[[SIGNAL:.+]]: !hal.fence)
-func.func @blocking_execute(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
+util.func public @blocking_execute(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
%affinity = arith.constant 0 : i64
// CHECK-DAG: %[[NULL:.+]] = util.null : !hal.fence
// CHECK-DAG: hal.fence.await until([%[[WAIT]]])
@@ -82,7 +82,7 @@
affinity(%affinity)
wait(%wait) signal(%signal)
commands([%cmd])
- return
+ util.return
}
} // module
@@ -93,7 +93,7 @@
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {legacy_sync}>]} {
// CHECK-LABEL: @blocking_execute
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[WAIT:.+]]: !hal.fence, %[[CMD:.+]]: !hal.command_buffer, %[[SIGNAL:.+]]: !hal.fence)
-func.func @blocking_execute(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
+util.func public @blocking_execute(%device: !hal.device, %wait: !hal.fence, %cmd: !hal.command_buffer, %signal: !hal.fence) {
// CHECK-NEXT: %[[TIMEOUT:.+]] = arith.constant 100
%timeout = arith.constant 100 : i32
// CHECK-NEXT: hal.fence.await until([%[[WAIT]]]) timeout_millis(%[[TIMEOUT]])
@@ -111,7 +111,7 @@
commands([%cmd])
// CHECK-NEXT: hal.fence.await until([%[[SIGNAL]]]) timeout_millis(%[[TIMEOUT]])
hal.fence.await until([%signal]) timeout_millis(%timeout) : i32
- // CHECK-NEXT: return
- return
+ // CHECK-NEXT: util.return
+ util.return
}
} // module
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_dispatch_instrumentation.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_dispatch_instrumentation.mlir
index fcb1a75..c51b98f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_dispatch_instrumentation.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_dispatch_instrumentation.mlir
@@ -17,7 +17,7 @@
// CHECK: util.global.store %[[ALLOC_BUFFER]], @__dispatch_instrumentation
// Query function used by tools to get the buffers and metadata:
- // CHECK: func.func @__query_instruments(%[[LIST:.+]]: !util.list<?>)
+ // CHECK: util.func public @__query_instruments(%[[LIST:.+]]: !util.list<?>)
// CHECK: %[[INTERNAL_BUFFER:.+]] = util.global.load @__dispatch_instrumentation
// CHECK: %[[EXPORTED_BUFFER:.+]] = stream.tensor.export %[[INTERNAL_BUFFER]]
// CHECK: util.list.set %[[LIST]]{{.+}}
@@ -56,7 +56,7 @@
}
}
}
- func.func @main(%arg0: !stream.resource<external>) -> !stream.resource<external> {
+ util.func public @main(%arg0: !stream.resource<external>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%ret0 = stream.resource.alloc uninitialized : !stream.resource<external>{%c128}
@@ -75,6 +75,6 @@
}
} => !stream.timepoint
%ret0_ready = stream.timepoint.await %timepoint => %ret0 : !stream.resource<external>{%c128}
- return %ret0_ready : !stream.resource<external>
+ util.return %ret0_ready : !stream.resource<external>
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
index 0d02221..7697d0d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
@@ -48,7 +48,7 @@
}
}
}
- func.func @main(%arg0: !stream.resource<constant>, %arg1: !stream.resource<transient>, %arg2: index, %arg3: i32) -> !stream.resource<transient> {
+ util.func public @main(%arg0: !stream.resource<constant>, %arg1: !stream.resource<transient>, %arg2: index, %arg3: i32) -> !stream.resource<transient> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -67,7 +67,7 @@
}
} => !stream.timepoint
%2 = stream.timepoint.await %1 => %0 : !stream.resource<transient>{%arg2}
- return %2 : !stream.resource<transient>
+ util.return %2 : !stream.resource<transient>
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
index 3701456..d706534 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
@@ -13,7 +13,7 @@
// CHECK-NEXT: util.global.store %[[LAYOUT]], @_descriptor_set_layout_0 : !hal.descriptor_set_layout
// CHECK-LABEL: @descriptorSetLayoutLookup
-func.func @descriptorSetLayoutLookup(%device : !hal.device) -> !hal.descriptor_set_layout {
+util.func public @descriptorSetLayoutLookup(%device : !hal.device) -> !hal.descriptor_set_layout {
// CHECK-NEXT: %[[LAYOUT:.+]] = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout
%0 = hal.descriptor_set_layout.lookup device(%device : !hal.device)
flags("None")
@@ -21,8 +21,8 @@
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<1, storage_buffer>
]) : !hal.descriptor_set_layout
- // CHECK-NEXT: return %[[LAYOUT]]
- return %0 : !hal.descriptor_set_layout
+ // CHECK-NEXT: util.return %[[LAYOUT]]
+ util.return %0 : !hal.descriptor_set_layout
}
// -----
@@ -40,7 +40,7 @@
// CHECK-NEXT: util.global.store %[[LAYOUT]], @_pipeline_layout_0 : !hal.pipeline_layout
// CHECK-LABEL: @exeLayoutLookup
-func.func @exeLayoutLookup(%device : !hal.device) -> !hal.pipeline_layout {
+util.func public @exeLayoutLookup(%device : !hal.device) -> !hal.pipeline_layout {
// CHECK: %[[LAYOUT:.+]] = util.global.load @_pipeline_layout_0 : !hal.pipeline_layout
%0 = hal.pipeline_layout.lookup device(%device : !hal.device)
layout(#hal.pipeline.layout<push_constants = 1, sets = [
@@ -49,8 +49,8 @@
#hal.descriptor_set.binding<1, storage_buffer>
]>
]>) : !hal.pipeline_layout
- // CHECK-NEXT: return %[[LAYOUT]]
- return %0 : !hal.pipeline_layout
+ // CHECK-NEXT: util.return %[[LAYOUT]]
+ util.return %0 : !hal.pipeline_layout
}
// -----
@@ -70,7 +70,7 @@
// CHECK-NEXT: util.global.store %[[LAYOUT]], @_pipeline_layout_0 : !hal.pipeline_layout
// CHECK-LABEL: @sharedLayoutLookup
-func.func @sharedLayoutLookup(%device : !hal.device) -> !hal.pipeline_layout {
+util.func public @sharedLayoutLookup(%device : !hal.device) -> !hal.pipeline_layout {
// CHECK: %[[LAYOUT:.+]] = util.global.load @_pipeline_layout_0 : !hal.pipeline_layout
%0 = hal.pipeline_layout.lookup device(%device : !hal.device)
layout(#hal.pipeline.layout<push_constants = 1, sets = [
@@ -83,12 +83,12 @@
#hal.descriptor_set.binding<1, uniform_buffer>
]>
]>) : !hal.pipeline_layout
- // CHECK-NEXT: return %[[LAYOUT]]
- return %0 : !hal.pipeline_layout
+ // CHECK-NEXT: util.return %[[LAYOUT]]
+ util.return %0 : !hal.pipeline_layout
}
// CHECK: @otherDescriptorSetLayoutLookup
-func.func @otherDescriptorSetLayoutLookup(%device : !hal.device) -> !hal.descriptor_set_layout {
+util.func public @otherDescriptorSetLayoutLookup(%device : !hal.device) -> !hal.descriptor_set_layout {
// CHECK: %[[LAYOUT:.+]] = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout
%0 = hal.descriptor_set_layout.lookup device(%device : !hal.device)
flags(None)
@@ -96,8 +96,8 @@
#hal.descriptor_set.binding<0, storage_buffer>,
#hal.descriptor_set.binding<1, storage_buffer>
]) : !hal.descriptor_set_layout
- // CHECK-NEXT: return %[[LAYOUT]]
- return %0 : !hal.descriptor_set_layout
+ // CHECK-NEXT: util.return %[[LAYOUT]]
+ util.return %0 : !hal.descriptor_set_layout
}
// -----
@@ -180,8 +180,8 @@
// CHECK: %[[LAYOUT1:.+]] = util.global.load @_pipeline_layout_1 : !hal.pipeline_layout
// Constant block initializers:
-// CHECK: %[[CONST_01:.+]]:2 = func.call @__constant_block_0()
-// CHECK: %[[CONST_2:.+]] = func.call @__constant_block_1(%[[DEVICE]])
+// CHECK: %[[CONST_01:.+]]:2 = util.call @__constant_block_0()
+// CHECK: %[[CONST_2:.+]] = util.call @__constant_block_1(%[[DEVICE]])
// Executable creation:
// CHECK: %[[EXE:.+]] = hal.executable.create
@@ -202,26 +202,26 @@
// CHECK: util.global.store %[[RET]], @_executable_exe : !hal.executable
// Inlined constant block functions (here we ensure all blocks are cloned):
-// CHECK: func.func private @__constant_block_0() -> (i32, i32)
+// CHECK: util.func private @__constant_block_0() -> (i32, i32)
// CHECK-DAG: %[[C0:.+]] = arith.constant 123
// CHECK-DAG: %[[C1:.+]] = arith.constant 456
-// CHECK: return %[[C0]], %[[C1]]
-// CHECK: func.func private @__constant_block_1(%[[BLOCK_DEVICE:.+]]: !hal.device) -> i32
+// CHECK: util.return %[[C0]], %[[C1]]
+// CHECK: util.func private @__constant_block_1(%[[BLOCK_DEVICE:.+]]: !hal.device) -> i32
// CHECK: %[[OK:.+]], %[[VALUE:.+]] = hal.device.query<%[[BLOCK_DEVICE]] : !hal.device> key("sys" :: "baz")
// CHECK: cf.cond_br %[[OK]], ^bb1, ^bb2
// CHECK: ^bb1:
-// CHECK: return %[[VALUE]]
+// CHECK: util.return %[[VALUE]]
// CHECK: ^bb2:
// CHECK: %[[DUMMY:.+]] = arith.constant 0
-// CHECK: return %[[DUMMY]]
+// CHECK: util.return %[[DUMMY]]
// CHECK-LABEL: @exeLookup
-func.func @exeLookup(%device : !hal.device) -> !hal.executable {
+util.func public @exeLookup(%device : !hal.device) -> !hal.executable {
// CHECK: %[[EXE:.+]] = util.global.load @_executable_exe : !hal.executable
%0 = hal.executable.lookup device(%device : !hal.device)
executable(@exe) : !hal.executable
- // CHECK-NEXT: return %[[EXE]]
- return %0 : !hal.executable
+ // CHECK-NEXT: util.return %[[EXE]]
+ util.return %0 : !hal.executable
}
}
@@ -293,11 +293,11 @@
}
// CHECK-LABEL: @exeLookup
-func.func @exeLookup(%device : !hal.device) -> !hal.executable {
+util.func public @exeLookup(%device : !hal.device) -> !hal.executable {
// CHECK: %[[EXE:.+]] = util.global.load @_executable_exe : !hal.executable
%0 = util.global.load @_executable_exe : !hal.executable
- // CHECK-NEXT: return %[[EXE]]
- return %0 : !hal.executable
+ // CHECK-NEXT: util.return %[[EXE]]
+ util.return %0 : !hal.executable
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/memoize_device_queries.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/memoize_device_queries.mlir
index 1fd8492..5211bd9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/memoize_device_queries.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/memoize_device_queries.mlir
@@ -18,8 +18,8 @@
// CHECK: util.global private @_device_query_2
-// CHECK-LABEL: func.func @device_matchers
-func.func @device_matchers(%device : !hal.device) -> (i1, i1, i1, i1, i1, i1) {
+// CHECK-LABEL: util.func public @device_matchers
+util.func public @device_matchers(%device : !hal.device) -> (i1, i1, i1, i1, i1, i1) {
// Same queries (same variables):
// CHECK-NEXT: = util.global.load @_device_query_0_ok : i1
// CHECK-NEXT: = util.global.load @_device_query_0 : i1
@@ -34,5 +34,5 @@
// CHECK-NEXT: = util.global.load @_device_query_2 : i1
%id1_b_ok, %id1_b = hal.device.query<%device : !hal.device> key("hal.device.id" :: "id1") : i1, i1 = true
- return %id0_a_ok, %id0_a, %id0_b_ok, %id0_b, %id1_a, %id1_b : i1, i1, i1, i1, i1, i1
+ util.return %id0_a_ok, %id0_a, %id0_b_ok, %id0_b, %id1_a, %id1_b : i1, i1, i1, i1, i1, i1
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
index 7e60dbc..0d9d21a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
@@ -1,11 +1,11 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-hal-repeat-dispatches{count=2}))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-hal-repeat-dispatches{count=2}))' %s | FileCheck %s
util.global @_executable : !hal.executable
// CHECK-LABEL: @duplicate_dispatches
// CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[CMD2:.+]]: !hal.command_buffer)
-func.func @duplicate_dispatches(%cmd1 : !hal.command_buffer, %cmd2 : !hal.command_buffer) {
+util.func public @duplicate_dispatches(%cmd1 : !hal.command_buffer, %cmd2 : !hal.command_buffer) {
// CHECK: %[[EXE:.+]] = util.global.load @_executable
%exe = util.global.load @_executable : !hal.executable
@@ -19,7 +19,7 @@
hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[3] workgroups([%c2, %c2, %c2])
hal.command_buffer.execution_barrier<%cmd2 : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None")
- return
+ util.return
}
// CHECK: hal.command_buffer.dispatch<%[[CMD1]] : !hal.command_buffer> target(%[[EXE]] : !hal.executable)[0] workgroups([%c1, %c1, %c1])
@@ -49,7 +49,7 @@
// CHECK-LABEL: @nested_dispatch
// CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[IDX:.+]]: index)
-func.func @nested_dispatch(%cmd1 : !hal.command_buffer, %idx : index) {
+util.func public @nested_dispatch(%cmd1 : !hal.command_buffer, %idx : index) {
// CHECK: %[[EXE:.+]] = util.global.load @_executable
%exe = util.global.load @_executable : !hal.executable
@@ -62,7 +62,7 @@
default {
}
- return
+ util.return
}
// CHECK: scf.index_switch
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
index 22d6f35..748f48e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
@@ -15,7 +15,7 @@
// CHECK-LABEL: @dispatch_with_nested_references
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer
-func.func @dispatch_with_nested_references(%cmd : !hal.command_buffer) {
+util.func public @dispatch_with_nested_references(%cmd : !hal.command_buffer) {
%c10 = arith.constant 10 : index
%c11 = arith.constant 11 : index
%c12 = arith.constant 12 : index
@@ -29,13 +29,13 @@
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer>
target(@exe::@target::@entry)
workgroups([%c10, %c11, %c12])
- return
+ util.return
}
// -----
// CHECK-LABEL: @dispatch_already_using_ordinals
-func.func @dispatch_already_using_ordinals(
+util.func public @dispatch_already_using_ordinals(
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer
%cmd: !hal.command_buffer,
// CHECK-SAME: %[[EXE:.+]]: !hal.executable
@@ -50,7 +50,7 @@
hal.command_buffer.dispatch<%cmd : !hal.command_buffer>
target(%exe : !hal.executable)[2]
workgroups([%c10, %c11, %c12])
- return
+ util.return
}
// -----
@@ -69,7 +69,7 @@
}
// CHECK-LABEL: @dispatch_indirect_with_nested_references
-func.func @dispatch_indirect_with_nested_references(
+util.func public @dispatch_indirect_with_nested_references(
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer
%cmd: !hal.command_buffer,
// CHECK-SAME: %[[BUF:.+]]: !hal.buffer
@@ -84,13 +84,13 @@
hal.command_buffer.dispatch.indirect.symbol<%cmd : !hal.command_buffer>
target(@exe::@target::@entry)
workgroups(%buf : !hal.buffer)[%c10]
- return
+ util.return
}
// -----
// CHECK-LABEL: @dispatch_indirect_already_using_ordinals
-func.func @dispatch_indirect_already_using_ordinals(
+util.func public @dispatch_indirect_already_using_ordinals(
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer
%cmd: !hal.command_buffer,
// CHECK-SAME: %[[EXE:.+]]: !hal.executable
@@ -105,5 +105,5 @@
hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer>
target(%exe : !hal.executable)[0]
workgroups(%buf : !hal.buffer)[%c10]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
index 6d90583..cfa6152 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/verify_target_environment.mlir
@@ -2,28 +2,28 @@
// expected-error@+1 {{no HAL target devices specified}}
module @module {
- func.func private @func() -> ()
+ util.func private @func() -> ()
}
// -----
// expected-error@+1 {{no HAL target devices specified}}
module @module attributes {hal.device.targets = []} {
- func.func private @func() -> ()
+ util.func private @func() -> ()
}
// -----
// expected-error@+1 {{invalid target attr type}}
module @module attributes {hal.device.targets = ["wrong_type"]} {
- func.func private @func() -> ()
+ util.func private @func() -> ()
}
// -----
// expected-error@+1 {{unregistered target backend "foo"}}
module @module attributes {hal.device.targets = [#hal.device.target<"foo">]} {
- func.func private @func() -> ()
+ util.func private @func() -> ()
}
// -----
@@ -35,7 +35,7 @@
// CHECK: module @module attributes {hal.device.targets = [#device_target_vmvx]}
module @module attributes {hal.device.targets = [#device_target_vmvx]} {
- func.func private @func() -> ()
+ util.func private @func() -> ()
}
// -----
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/call_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/call_ops.mlir
index 37d37a1..d9a425c 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/call_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/call_ops.mlir
@@ -8,14 +8,14 @@
// CHECK-LABEL: @basicCall
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[SIZE0:.+]]: index, %[[DIM0:.+]]: index)
-func.func @basicCall(%arg0: tensor<?xf32>, %dim0: index) -> tensor<?xf32> {
+util.func public @basicCall(%arg0: tensor<?xf32>, %dim0: index) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<?xf32>{%[[DIM0]]}
// CHECK: %[[CALL:.+]] = stream.async.call @basicExtern
// CHECK-SAME: (%[[ARG0]][%c0 to %[[SIZE0]] for %[[SIZE0]]], %[[DIM0]]) : (!stream.resource<*>{%[[SIZE0]]}, index) -> !stream.resource<*>{%[[RESULT_SIZE]]}
%call = flow.call @basicExtern(%arg0, %dim0) : (tensor<?xf32>{%dim0}, index) -> tensor<?xf32>{%dim0}
- // CHECK: return %[[CALL]], %[[RESULT_SIZE]]
- return %call : tensor<?xf32>
+ // CHECK: util.return %[[CALL]], %[[RESULT_SIZE]]
+ util.return %call : tensor<?xf32>
}
// -----
@@ -28,12 +28,12 @@
// CHECK-LABEL: @inplaceCall
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[SIZE0:.+]]: index, %[[DIM0:.+]]: index)
-func.func @inplaceCall(%arg0: tensor<?xf32>, %dim0: index) -> tensor<?xf32> {
+util.func public @inplaceCall(%arg0: tensor<?xf32>, %dim0: index) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
// CHECK: %[[CALL:.+]] = stream.async.call @inplaceExtern(%[[ARG0]][%c0 to %[[SIZE0]] for %[[SIZE0]]], %[[DIM0]]) : (!stream.resource<*>{%[[SIZE0]]}, index) -> %[[ARG0]]{%[[SIZE0]]}
%call = flow.call @inplaceExtern(%arg0, %dim0) : (tensor<?xf32>{%dim0}, index) -> %arg0{%dim0}
- // CHECK: return %[[CALL]], %[[SIZE0]]
- return %call : tensor<?xf32>
+ // CHECK: util.return %[[CALL]], %[[SIZE0]]
+ util.return %call : tensor<?xf32>
}
// -----
@@ -46,10 +46,10 @@
// CHECK-LABEL: @inplaceTypeChangeCall
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[SIZE0:.+]]: index, %[[DIM0:.+]]: index)
-func.func @inplaceTypeChangeCall(%arg0: tensor<?x4xf32>, %dim0: index) -> tensor<4x?xi32> {
+util.func public @inplaceTypeChangeCall(%arg0: tensor<?x4xf32>, %dim0: index) -> tensor<4x?xi32> {
%c0 = arith.constant 0 : index
// CHECK: %[[CALL:.+]] = stream.async.call @inplaceTypeChangeExtern(%[[ARG0]][%c0 to %[[SIZE0]] for %[[SIZE0]]], %[[DIM0]]) : (!stream.resource<*>{%[[SIZE0]]}, index) -> %[[ARG0]]{%[[SIZE0]]}
%call = flow.call @inplaceTypeChangeExtern(%arg0, %dim0) : (tensor<?x4xf32>{%dim0}, index) -> %arg0 as tensor<4x?xi32>{%dim0}
- // CHECK: return %[[CALL]], %[[SIZE0]]
- return %call : tensor<4x?xi32>
+ // CHECK: util.return %[[CALL]], %[[SIZE0]]
+ util.return %call : tensor<4x?xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/collective_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/collective_ops.mlir
index c44f92e..64a2a65 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/collective_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/collective_ops.mlir
@@ -2,88 +2,88 @@
// CHECK-LABEL: @channel_split
// CHECK-SAME: (%[[BASE_CHANNEL:.+]]: !stream.channel)
-func.func @channel_split(%base_channel: !flow.channel) -> !flow.channel {
+util.func public @channel_split(%base_channel: !flow.channel) -> !flow.channel {
// CHECK-DAG: %[[COLOR:.+]] = arith.constant 100 : index
%color = arith.constant 100 : index
// CHECK-DAG: %[[KEY:.+]] = arith.constant 101 : index
%key = arith.constant 101 : index
// CHECK: %[[SPLIT_CHANNEL:.+]] = stream.channel.split %[[BASE_CHANNEL]], %[[COLOR]], %[[KEY]] : !stream.channel -> !stream.channel
%split_channel = flow.channel.split %base_channel, %color, %key : !flow.channel -> !flow.channel
- // CHECK: return %[[SPLIT_CHANNEL]]
- return %split_channel : !flow.channel
+ // CHECK: util.return %[[SPLIT_CHANNEL]]
+ util.return %split_channel : !flow.channel
}
// -----
// CHECK-LABEL: @channel_rank
// CHECK-SAME: (%[[CHANNEL:.+]]: !stream.channel)
-func.func @channel_rank(%channel: !flow.channel) -> index {
+util.func public @channel_rank(%channel: !flow.channel) -> index {
// CHECK: %[[RANK:.+]] = stream.channel.rank %[[CHANNEL]] : index
- // CHECK: return %[[RANK]] : index
+ // CHECK: util.return %[[RANK]] : index
%rank = flow.channel.rank %channel : index
- return %rank : index
+ util.return %rank : index
}
// -----
// CHECK-LABEL: @channel_count
// CHECK-SAME: (%[[CHANNEL:.+]]: !stream.channel)
-func.func @channel_count(%channel: !flow.channel) -> index {
+util.func public @channel_count(%channel: !flow.channel) -> index {
// CHECK: %[[COUNT:.+]] = stream.channel.count %[[CHANNEL]] : index
- // CHECK: return %[[COUNT]] : index
+ // CHECK: util.return %[[COUNT]] : index
%count = flow.channel.count %channel : index
- return %count : index
+ util.return %count : index
}
// -----
// CHECK-LABEL: @all_reduce_sum
-func.func @all_reduce_sum(%channel: !flow.channel, %arg0: tensor<2304xf32>) -> tensor<2304xf32> {
+util.func public @all_reduce_sum(%channel: !flow.channel, %arg0: tensor<2304xf32>) -> tensor<2304xf32> {
// CHECK: stream.tensor.empty : tensor<2304xf32>
// CHECK: stream.async.collective<all_reduce with sum : f32>
%0 = flow.tensor.empty : tensor<2304xf32>
%1 = flow.collective.all_reduce sum, f32, %0, %arg0, %channel : (tensor<2304xf32>, tensor<2304xf32>, !flow.channel) -> tensor<2304xf32>
- return %1 : tensor<2304xf32>
+ util.return %1 : tensor<2304xf32>
}
// -----
// CHECK-LABEL: @all_gather
-func.func @all_gather(%channel: !flow.channel, %arg0: tensor<512xf32>) -> tensor<1024xf32> {
+util.func public @all_gather(%channel: !flow.channel, %arg0: tensor<512xf32>) -> tensor<1024xf32> {
// CHECK: stream.tensor.empty : tensor<1024xf32>
// CHECK: stream.async.collective<all_gather : f32>
%0 = flow.tensor.empty : tensor<1024xf32>
%1 = flow.collective.all_gather f32, %0, %arg0, %channel : (tensor<1024xf32>, tensor<512xf32>, !flow.channel) -> tensor<1024xf32>
- return %1 : tensor<1024xf32>
+ util.return %1 : tensor<1024xf32>
}
// -----
// CHECK-LABEL: @all_to_all
-func.func @all_to_all(%channel: !flow.channel, %arg0: tensor<1024xf32>) -> tensor<1024xf32> {
+util.func public @all_to_all(%channel: !flow.channel, %arg0: tensor<1024xf32>) -> tensor<1024xf32> {
// CHECK: stream.tensor.empty : tensor<1024xf32>
// CHECK: stream.async.collective<all_to_all : f32>
%0 = flow.tensor.empty : tensor<1024xf32>
%1 = flow.collective.all_to_all f32, %0, %arg0, %channel : (tensor<1024xf32>, tensor<1024xf32>, !flow.channel) -> tensor<1024xf32>
- return %1 : tensor<1024xf32>
+ util.return %1 : tensor<1024xf32>
}
// -----
// CHECK-LABEL: @reduce_scatter
-func.func @reduce_scatter(%channel: !flow.channel, %arg0: tensor<4x2xf32>) -> tensor<2x2xf32> {
+util.func public @reduce_scatter(%channel: !flow.channel, %arg0: tensor<4x2xf32>) -> tensor<2x2xf32> {
// CHECK: stream.tensor.empty : tensor<2x2xf32>
// CHECK: stream.async.collective<reduce_scatter with sum : f32>
%0 = flow.tensor.empty : tensor<2x2xf32>
%1 = flow.collective.reduce_scatter sum, f32, %0, %arg0, %channel : (tensor<2x2xf32>, tensor<4x2xf32>, !flow.channel) -> tensor<2x2xf32>
- return %1 : tensor<2x2xf32>
+ util.return %1 : tensor<2x2xf32>
}
// -----
// CHECK-LABEL: @send_recv
// CHECK-SAME: index, %[[SEND:.+]]: index, %[[RECV:.+]]: index)
-func.func @send_recv(%channel: !flow.channel, %arg0: tensor<1024xf32>, %send: index, %recv: index) -> tensor<1024xf32> {
+util.func public @send_recv(%channel: !flow.channel, %arg0: tensor<1024xf32>, %send: index, %recv: index) -> tensor<1024xf32> {
// CHECK: stream.tensor.empty : tensor<1024xf32>
// CHECK-DAG: %[[CST_LO_MASK:.+]] = arith.constant 65535 : i32
// CHECK-DAG: %[[CST_SHIFT16:.+]] = arith.constant 16 : i32
@@ -96,5 +96,5 @@
// CHECK-SAME: source_target_pair(%[[PARAM]])
%0 = flow.tensor.empty : tensor<1024xf32>
%1 = flow.collective.send_recv f32, %0, %arg0, %channel, %send, %recv : (tensor<1024xf32>, tensor<1024xf32>, !flow.channel, index, index) -> tensor<1024xf32>
- return %1 : tensor<1024xf32>
+ util.return %1 : tensor<1024xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir
index 964a97b..da75704 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/dispatch_ops.mlir
@@ -2,20 +2,20 @@
// CHECK-LABEL: @dispatchNoWorkload
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index)
-func.func @dispatchNoWorkload(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor<?x?x1024xf32> {
+util.func public @dispatchNoWorkload(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor<?x?x1024xf32> {
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<?x?x1024xf32>{%[[DIM1]], %[[DIM3]]}
// CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@entry(%[[INPUT]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]]) :
// CHECK-SAME: (!stream.resource<*>{%[[INPUT_SIZE]]}) -> !stream.resource<*>{%[[RESULT_SIZE]]}
%0 = flow.dispatch @ex::@entry(%input) : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor<?x?x1024xf32>{%dim1, %dim3}
// return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<?x?x1024xf32>
+ util.return %0 : tensor<?x?x1024xf32>
}
// -----
// CHECK-LABEL: @dispatch
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index)
-func.func @dispatch(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor<?x?x1024xf32> {
+util.func public @dispatch(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> tensor<?x?x1024xf32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
@@ -24,14 +24,14 @@
// CHECK-SAME: (!stream.resource<*>{%[[INPUT_SIZE]]}) -> !stream.resource<*>{%[[RESULT_SIZE]]}
%0 = flow.dispatch @ex::@entry[%c1, %c2, %c3](%input) : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor<?x?x1024xf32>{%dim1, %dim3}
// return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<?x?x1024xf32>
+ util.return %0 : tensor<?x?x1024xf32>
}
// -----
// CHECK-LABEL: @tiedDispatch
// CHECK-SAME: (%[[INPUT0:.+]]: !stream.resource<*>, %[[INPUT0_SIZE:.+]]: index, %[[INPUT1:.+]]: !stream.resource<*>, %[[INPUT1_SIZE:.+]]: index)
-func.func @tiedDispatch(%input0: tensor<i32>, %input1: tensor<2x3xi32>) -> tensor<3x9xi32> {
+util.func public @tiedDispatch(%input0: tensor<i32>, %input1: tensor<2x3xi32>) -> tensor<3x9xi32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
@@ -40,15 +40,15 @@
%0 = flow.dispatch @ex::@entry0[%c1, %c2, %c3](%input0) : (tensor<i32>) -> tensor<3x9xi32>
// CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@entry1[%c1, %c2, %c3](%[[INPUT1]][%c0 to %[[INPUT1_SIZE]] for %[[INPUT1_SIZE]]], %[[T]][%c0 to %[[T_SIZE]] for %[[T_SIZE]]]) : (!stream.resource<*>{%[[INPUT1_SIZE]]}, !stream.resource<*>{%[[T_SIZE]]}) -> %[[T]]{%[[T_SIZE]]}
%1 = flow.dispatch @ex::@entry1[%c1, %c2, %c3](%input1, %0) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %0
- // CHECK: return %[[RESULT]], %[[T_SIZE]] : !stream.resource<*>, index
- return %1 : tensor<3x9xi32>
+ // CHECK: util.return %[[RESULT]], %[[T_SIZE]] : !stream.resource<*>, index
+ util.return %1 : tensor<3x9xi32>
}
// -----
// CHECK-LABEL: @dispatchAffinity
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM1:.+]]: index, %[[DIM3:.+]]: index)
-func.func @dispatchAffinity(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> (tensor<?x?x1024xf32>, tensor<?x?x1024xf32>) {
+util.func public @dispatchAffinity(%input: tensor<7x?x24x?xf32>, %dim1: index, %dim3: index) -> (tensor<?x?x1024xf32>, tensor<?x?x1024xf32>) {
// CHECK: %[[RESULT0_SIZE:.+]] = stream.tensor.sizeof on(#hal.affinity.queue<[0]>) tensor<?x?x1024xf32>{%[[DIM1]], %[[DIM3]]}
// CHECK: %[[RESULT0:.+]] = stream.async.dispatch on(#hal.affinity.queue<[0]>) @ex::@entry0(%[[INPUT]][%c0 to %[[INPUT_SIZE]] for %[[INPUT_SIZE]]])
%0 = flow.dispatch @ex::@entry0(%input) {
@@ -60,5 +60,5 @@
stream.affinity = #hal.affinity.queue<[1]>
} : (tensor<7x?x24x?xf32>{%dim1, %dim3}) -> tensor<?x?x1024xf32>{%dim3, %dim1}
// return %[[RESULT0]], %[[RESULT0_SIZE]], %[[RESULT1]], %[[RESULT1_SIZE]]
- return %0, %1 : tensor<?x?x1024xf32>, tensor<?x?x1024xf32>
+ util.return %0, %1 : tensor<?x?x1024xf32>, tensor<?x?x1024xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
index d221ddf..d45d1f5 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
@@ -19,9 +19,9 @@
flow.return %arg0, %arg0, %arg0 : index, index, index
}
builtin.module {
- // CHECK: func.func @dispatch()
- func.func @dispatch() {
- return
+ // CHECK: util.func public @dispatch()
+ util.func public @dispatch() {
+ util.return
}
}
}
@@ -32,13 +32,13 @@
flow.executable private @rank_0_binding {
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[INPUT:.+]]: !stream.binding)
- func.func @dispatch(%input: !flow.dispatch.tensor<readonly:tensor<i64>>) {
+ // CHECK: util.func public @dispatch(%[[INPUT:.+]]: !stream.binding)
+ util.func public @dispatch(%input: !flow.dispatch.tensor<readonly:tensor<i64>>) {
// CHECK: %[[SUBSPAN:.+]] = stream.binding.subspan %[[INPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<i64>>
// CHECK: = flow.dispatch.tensor.load %[[SUBSPAN]]
%tied_input = flow.dispatch.tensor.load %input, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i64>> -> tensor<i64>
util.optimization_barrier %tied_input : tensor<i64>
- return
+ util.return
}
}
}
@@ -49,8 +49,8 @@
flow.executable private @static_bindings {
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
- func.func @dispatch(%input: !flow.dispatch.tensor<readonly:tensor<1x4xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<4xf32>>) {
+ // CHECK: util.func public @dispatch(%[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
+ util.func public @dispatch(%input: !flow.dispatch.tensor<readonly:tensor<1x4xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<4xf32>>) {
// CHECK-DAG: %[[TIED_INPUT:.+]] = stream.binding.subspan %[[INPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<1x4xf32>>
// CHECK-DAG: %[[TIED_OUTPUT:.+]] = stream.binding.subspan %[[OUTPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<4xf32>>
%tied_input = flow.dispatch.tie_shape %input : !flow.dispatch.tensor<readonly:tensor<1x4xf32>>
@@ -60,7 +60,7 @@
// CHECK: flow.dispatch.tensor.store %[[TILE]], %[[TIED_OUTPUT]]
%tile = flow.dispatch.tensor.load %tied_input, offsets = [0, 0], sizes = [1, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4xf32>> -> tensor<4xf32>
flow.dispatch.tensor.store %tile, %tied_output, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:tensor<4xf32>>
- return
+ util.return
}
}
}
@@ -71,8 +71,8 @@
flow.executable private @dynamic_bindings {
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[DIM:.+]]: index, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
- func.func @dispatch(%dim: index, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
+ // CHECK: util.func public @dispatch(%[[DIM:.+]]: index, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
+ util.func public @dispatch(%dim: index, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
// CHECK-DAG: %[[TIED_INPUT:.+]] = stream.binding.subspan %[[INPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%[[DIM]]}
// CHECK-DAG: %[[TIED_OUTPUT:.+]] = stream.binding.subspan %[[OUTPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%[[DIM]]}
%tied_input = flow.dispatch.tie_shape %input : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%dim}
@@ -82,7 +82,7 @@
// CHECK: flow.dispatch.tensor.store %[[TILE]], %[[TIED_OUTPUT]]
%tile = flow.dispatch.tensor.load %tied_input, offsets = [0, 0], sizes = [1, %dim], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%dim} -> tensor<?xf32>
flow.dispatch.tensor.store %tile, %tied_output, offsets = [0], sizes = [%dim], strides = [1] : tensor<?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%dim}
- return
+ util.return
}
}
}
@@ -93,8 +93,8 @@
flow.executable private @indirect_dynamic_bindings {
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[DIM_TENSOR:.+]]: !stream.binding, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
- func.func @dispatch(%dim_tensor: !flow.dispatch.tensor<readonly:tensor<i64>>, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
+ // CHECK: util.func public @dispatch(%[[DIM_TENSOR:.+]]: !stream.binding, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
+ util.func public @dispatch(%dim_tensor: !flow.dispatch.tensor<readonly:tensor<i64>>, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
// CHECK: %[[DIM_SUBSPAN:.+]] = stream.binding.subspan %[[DIM_TENSOR]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<i64>>
// CHECK: %[[DIM_TILE:.+]] = flow.dispatch.tensor.load %[[DIM_SUBSPAN]]
// CHECK: %[[DIM_I64:.+]] = tensor.extract %[[DIM_TILE]][] : tensor<i64>
@@ -112,7 +112,7 @@
// CHECK: flow.dispatch.tensor.store %[[TILE]], %[[TIED_OUTPUT]]
%tile = flow.dispatch.tensor.load %tied_input, offsets = [0, 0], sizes = [1, %dim], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%dim} -> tensor<?xf32>
flow.dispatch.tensor.store %tile, %tied_output, offsets = [0], sizes = [%dim], strides = [1] : tensor<?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%dim}
- return
+ util.return
}
}
}
@@ -123,8 +123,8 @@
flow.executable private @nested_bindings {
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[DIM:.+]]: index, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
- func.func @dispatch(%dim: index, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
+ // CHECK: util.func public @dispatch(%[[DIM:.+]]: index, %[[INPUT:.+]]: !stream.binding, %[[OUTPUT:.+]]: !stream.binding)
+ util.func public @dispatch(%dim: index, %input: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>, %output: !flow.dispatch.tensor<writeonly:tensor<?xf32>>) {
// CHECK-DAG: stream.dispatch.workgroup.size[0] : index
%workgroup_size_0 = flow.dispatch.workgroup.size[0] : index
// CHECK-DAG: stream.dispatch.workgroup.id[0] : index
@@ -146,7 +146,7 @@
%tile = flow.dispatch.tensor.load %tied_input, offsets = [0, %arg3], sizes = [1, %7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%dim} -> tensor<?xf32>
flow.dispatch.tensor.store %tile, %tied_output, offsets = [%arg3], sizes = [%7], strides = [1] : tensor<?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%dim}
}
- return
+ util.return
}
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir
index 2bbd064..7de878f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/tensor_ops.mlir
@@ -2,33 +2,33 @@
// CHECK-LABEL: @tensorReshapePassThrough
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index)
-func.func @tensorReshapePassThrough(%input: tensor<5x24x48xf32>) -> tensor<30x2x96xf32> {
+util.func public @tensorReshapePassThrough(%input: tensor<5x24x48xf32>) -> tensor<30x2x96xf32> {
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<30x2x96xf32> : index
// CHECK: %[[RESULT:.+]] = stream.tensor.clone %[[INPUT]] : tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<30x2x96xf32> in !stream.resource<*>{%[[RESULT_SIZE]]}
%0 = flow.tensor.reshape %input : tensor<5x24x48xf32> -> tensor<30x2x96xf32>
- // CHECK: return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<30x2x96xf32>
+ // CHECK: util.return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<30x2x96xf32>
}
// -----
// CHECK-LABEL: @tensorReshapeWithSingleUse
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index)
-func.func @tensorReshapeWithSingleUse(%input: tensor<5x24x48xf32>) -> tensor<30x2x96xf32> {
+util.func public @tensorReshapeWithSingleUse(%input: tensor<5x24x48xf32>) -> tensor<30x2x96xf32> {
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<30x2x96xf32> : index
// CHECK: %[[RESHAPE:.+]] = stream.tensor.clone %[[INPUT]] : tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<30x2x96xf32> in !stream.resource<*>{%[[RESULT_SIZE]]}
%0 = flow.tensor.reshape %input : tensor<5x24x48xf32> -> tensor<30x2x96xf32>
// CHECK: %[[RESULT:.+]] = stream.tensor.clone %[[RESHAPE]] : tensor<30x2x96xf32> in !stream.resource<*>{%[[RESULT_SIZE]]} -> tensor<30x2x96xf32> in !stream.resource<*>{%[[RESULT_SIZE]]}
%1 = flow.tensor.clone %0 : tensor<30x2x96xf32>
- // CHECK: return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
- return %1 : tensor<30x2x96xf32>
+ // CHECK: util.return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
+ util.return %1 : tensor<30x2x96xf32>
}
// -----
// CHECK-LABEL: @tensorReshapeWithMultipleUses
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index)
-func.func @tensorReshapeWithMultipleUses(%input: tensor<5x24x48xf32>)
+util.func public @tensorReshapeWithMultipleUses(%input: tensor<5x24x48xf32>)
-> (tensor<60x2x48xf32>, tensor<30x2x96xf32>) {
// CHECK: %[[T0:.+]] = stream.tensor.clone %[[INPUT]] : tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]}
%1 = flow.tensor.clone %input : tensor<5x24x48xf32>
@@ -40,65 +40,65 @@
// CHECK: %[[T3_SIZE:.+]] = stream.tensor.sizeof tensor<30x2x96xf32> : index
// CHECK: %[[T3:.+]] = stream.tensor.clone %[[T0]] : tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<30x2x96xf32> in !stream.resource<*>{%[[T3_SIZE]]}
%4 = flow.tensor.reshape %1 : tensor<5x24x48xf32> -> tensor<30x2x96xf32>
- // CHECK: return %[[T2]], %[[T1_SIZE]], %[[T3]], %[[T3_SIZE]] : !stream.resource<*>, index, !stream.resource<*>, index
- return %3, %4 : tensor<60x2x48xf32>, tensor<30x2x96xf32>
+ // CHECK: util.return %[[T2]], %[[T1_SIZE]], %[[T3]], %[[T3_SIZE]] : !stream.resource<*>, index, !stream.resource<*>, index
+ util.return %3, %4 : tensor<60x2x48xf32>, tensor<30x2x96xf32>
}
// -----
// CHECK-LABEL: @tensorBitCastWithSingleUse
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index)
-func.func @tensorBitCastWithSingleUse(%input: tensor<5x24x48xi8>) -> tensor<30x2x192xi4> {
+util.func public @tensorBitCastWithSingleUse(%input: tensor<5x24x48xi8>) -> tensor<30x2x192xi4> {
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<30x2x192xi4> : index
// CHECK: %[[BITCAST:.+]] = stream.tensor.clone %[[INPUT]] : tensor<5x24x48xi8> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<30x2x192xi4> in !stream.resource<*>{%[[RESULT_SIZE]]}
%0 = flow.tensor.bitcast %input : tensor<5x24x48xi8> -> tensor<30x2x192xi4>
// CHECK: %[[RESULT:.+]] = stream.tensor.clone %[[BITCAST]] : tensor<30x2x192xi4> in !stream.resource<*>{%[[RESULT_SIZE]]} -> tensor<30x2x192xi4> in !stream.resource<*>{%[[RESULT_SIZE]]}
%1 = flow.tensor.clone %0 : tensor<30x2x192xi4>
- // CHECK: return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
- return %1 : tensor<30x2x192xi4>
+ // CHECK: util.return %[[RESULT]], %[[RESULT_SIZE]] : !stream.resource<*>, index
+ util.return %1 : tensor<30x2x192xi4>
}
// -----
// CHECK-LABEL: @tensorAlloca
// CHECK-SAME: (%[[DIM0:.+]]: index)
-func.func @tensorAlloca(%dim0: index) -> tensor<?x0xf32> {
+util.func public @tensorAlloca(%dim0: index) -> tensor<?x0xf32> {
// CHECK: %[[ALLOCA_SIZE:.+]] = stream.tensor.sizeof tensor<?x0xf32>{%[[DIM0]]}
// CHECK: %[[ALLOCA:.+]] = stream.async.alloca : !stream.resource<*>{%[[ALLOCA_SIZE]]}
%0 = flow.tensor.alloca : tensor<?x0xf32>{%dim0}
- // CHECK: return %[[ALLOCA]]
- return %0 : tensor<?x0xf32>
+ // CHECK: util.return %[[ALLOCA]]
+ util.return %0 : tensor<?x0xf32>
}
// -----
// CHECK-LABEL: @tensorEmpty
// CHECK-SAME: (%[[DIM0:.+]]: index)
-func.func @tensorEmpty(%dim0: index) -> tensor<?x0xf32> {
+util.func public @tensorEmpty(%dim0: index) -> tensor<?x0xf32> {
// CHECK: %[[EMPTY_SIZE:.+]] = stream.tensor.sizeof tensor<?x0xf32>{%[[DIM0]]}
// CHECK: %[[EMPTY:.+]] = stream.tensor.empty : tensor<?x0xf32>{%[[DIM0]]} in !stream.resource<*>{%[[EMPTY_SIZE]]}
%0 = flow.tensor.empty : tensor<?x0xf32>{%dim0}
- // CHECK: return %[[EMPTY]]
- return %0 : tensor<?x0xf32>
+ // CHECK: util.return %[[EMPTY]]
+ util.return %0 : tensor<?x0xf32>
}
// -----
// CHECK-LABEL: @tensorSplat
// CHECK-SAME: (%[[VALUE:.+]]: i8, %[[DIM0:.+]]: index)
-func.func @tensorSplat(%value: i8, %dim0: index) -> tensor<?x128xi8> {
+util.func public @tensorSplat(%value: i8, %dim0: index) -> tensor<?x128xi8> {
// CHECK: %[[T_SIZE:.+]] = stream.tensor.sizeof tensor<?x128xi8>{%[[DIM0]]} : index
// CHECK: %[[T:.+]] = stream.tensor.splat %[[VALUE]] : i8 -> tensor<?x128xi8>{%[[DIM0]]} in !stream.resource<*>{%[[T_SIZE]]}
%0 = flow.tensor.splat %value : tensor<?x128xi8>{%dim0}
- // CHECK: return %[[T]], %[[T_SIZE]]
- return %0 : tensor<?x128xi8>
+ // CHECK: util.return %[[T]], %[[T_SIZE]]
+ util.return %0 : tensor<?x128xi8>
}
// -----
// CHECK-LABEL: @tensorSlice
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index)
-func.func @tensorSlice(%input : tensor<5x24x48xf32>) -> tensor<3x24x48xf32> {
+util.func public @tensorSlice(%input : tensor<5x24x48xf32>) -> tensor<3x24x48xf32> {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
@@ -107,28 +107,28 @@
// CHECK: %[[T_SIZE:.+]] = stream.tensor.sizeof tensor<3x24x48xf32> : index
// CHECK: %[[T:.+]] = stream.tensor.slice %[[INPUT]][%c2, %c0, %c0 for %c3, %c24, %c48] : tensor<5x24x48xf32> in !stream.resource<*>{%[[INPUT_SIZE]]} -> tensor<3x24x48xf32> in !stream.resource<*>{%[[T_SIZE]]}
%0 = flow.tensor.slice %input[%c2, %c0, %c0 for %c3, %c24, %c48] : tensor<5x24x48xf32> -> tensor<3x24x48xf32>
- // CHECK: return %[[T]], %[[T_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<3x24x48xf32>
+ // CHECK: util.return %[[T]], %[[T_SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<3x24x48xf32>
}
// -----
// CHECK-LABEL: @tensorUpdate
// CHECK-SAME: (%[[UPDATE:.+]]: !stream.resource<*>, %[[UPDATE_SIZE:.+]]: index, %[[TARGET:.+]]: !stream.resource<*>, %[[TARGET_SIZE:.+]]: index)
-func.func @tensorUpdate(%update : tensor<1x1x10xf32>, %target : tensor<5x1x10xf32>) -> tensor<5x1x10xf32> {
+util.func public @tensorUpdate(%update : tensor<1x1x10xf32>, %target : tensor<5x1x10xf32>) -> tensor<5x1x10xf32> {
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
// CHECK: %[[T:.+]] = stream.tensor.update %[[UPDATE]], %[[TARGET]][%c4, %c1, %c1] : tensor<1x1x10xf32> in !stream.resource<*>{%[[UPDATE_SIZE]]} -> tensor<5x1x10xf32> in %[[TARGET]] as !stream.resource<*>{%[[TARGET_SIZE]]}
%0 = flow.tensor.update %update, %target[%c4, %c1, %c1] : tensor<1x1x10xf32> -> %target as tensor<5x1x10xf32>
- // CHECK: return %[[T]], %[[TARGET_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<5x1x10xf32>
+ // CHECK: util.return %[[T]], %[[TARGET_SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<5x1x10xf32>
}
// -----
// CHECK-LABEL: @tensorLoad
// CHECK-SAME: (%[[SOURCE:.+]]: !stream.resource<*>, %[[SOURCE_SIZE:.+]]: index)
-func.func @tensorLoad(%source : tensor<2x3xi32>) -> i32 {
+util.func public @tensorLoad(%source : tensor<2x3xi32>) -> i32 {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: %[[T0:.+]] = stream.async.transfer from(#hal.affinity.queue<[0, 1]>) %[[SOURCE]] :
@@ -137,15 +137,15 @@
%0 = flow.tensor.load %source[%c0, %c1] : tensor<2x3xi32> attributes {
stream.affinity = #hal.affinity.queue<[0, 1]>
}
- // CHECK: return %[[T1]]
- return %0 : i32
+ // CHECK: util.return %[[T1]]
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @tensorStore
// CHECK-SAME: (%[[TARGET:.+]]: !stream.resource<*>, %[[TARGET_SIZE:.+]]: index)
-func.func @tensorStore(%target : tensor<2x3xi32>) -> tensor<2x3xi32> {
+util.func public @tensorStore(%target : tensor<2x3xi32>) -> tensor<2x3xi32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c9 = arith.constant 9 : i32
@@ -158,15 +158,15 @@
%0 = flow.tensor.store %c9, %target[%c0, %c1] : tensor<2x3xi32> attributes {
stream.affinity = #hal.affinity.queue<[0, 1]>
}
- // CHECK: return %[[T2]]
- return %0 : tensor<2x3xi32>
+ // CHECK: util.return %[[T2]]
+ util.return %0 : tensor<2x3xi32>
}
// -----
// CHECK-LABEL: @tensorTrace
// CHECK-SAME: (%[[TENSOR0:.+]]: !stream.resource<*>, %[[TENSOR0_SIZE:.+]]: index, %[[TENSOR1:.+]]: !stream.resource<*>, %[[TENSOR1_SIZE:.+]]: index, %[[TENSOR1_DIM0:.+]]: index, %[[TENSOR1_DIM2:.+]]: index)
-func.func @tensorTrace(%tensor0: tensor<5xf32>, %tensor1: tensor<?x3x?xi32>, %tensor1_dim0: index, %tensor1_dim2: index) {
+util.func public @tensorTrace(%tensor0: tensor<5xf32>, %tensor1: tensor<?x3x?xi32>, %tensor1_dim0: index, %tensor1_dim2: index) {
// CHECK-DAG: %[[TENSOR0_STAGED:.+]] = stream.async.transfer %[[TENSOR0]] : !stream.resource<*>{%[[TENSOR0_SIZE]]} -> !stream.resource<staging>{%[[TENSOR0_SIZE]]}
// CHECK-DAG: %[[TENSOR1_STAGED:.+]] = stream.async.transfer %[[TENSOR1]] : !stream.resource<*>{%[[TENSOR1_SIZE]]} -> !stream.resource<staging>{%[[TENSOR1_SIZE]]}
// CHECK: stream.tensor.trace "FOOBAR" = [
@@ -177,5 +177,5 @@
%tensor0 : tensor<5xf32>,
%tensor1 : tensor<?x3x?xi32>{%tensor1_dim0, %tensor1_dim2}
]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/test/abi_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/test/abi_ops.mlir
index 1ffde57..8dc6705 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/test/abi_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/test/abi_ops.mlir
@@ -3,7 +3,7 @@
// CHECK-LABEL: @importBufferView
// CHECK-SAME: (%[[VIEW:.+]]: !hal.buffer_view)
// CHECK-SAME: -> (!stream.resource<*>, index)
-func.func @importBufferView(%view: !hal.buffer_view) -> tensor<?x?x4xf32> {
+util.func public @importBufferView(%view: !hal.buffer_view) -> tensor<?x?x4xf32> {
// CHECK-DAG: %[[DIM0:.+]] = hal.buffer_view.dim{{.+}}[0]
%dim0 = hal.buffer_view.dim<%view : !hal.buffer_view>[0] : index
// CHECK-DAG: %[[DIM1:.+]] = hal.buffer_view.dim{{.+}}[1]
@@ -14,23 +14,23 @@
// CHECK-NEXT: %[[RESULT:.+]] = stream.async.transfer %[[RESOURCE]] :
// CHECK-SAME: !stream.resource<external>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
%0 = hal.tensor.import %view : !hal.buffer_view -> tensor<?x?x4xf32>{%dim0, %dim1}
- // CHECK: return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
- return %0 : tensor<?x?x4xf32>
+ // CHECK: util.return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<?x?x4xf32>
}
// -----
// CHECK-LABEL: @importBufferViewBitcasting
// CHECK-SAME: (%[[VIEW:.+]]: !hal.buffer_view) -> (!stream.resource<*>, index)
-func.func @importBufferViewBitcasting(%view: !hal.buffer_view) -> tensor<4xbf16> {
+util.func public @importBufferViewBitcasting(%view: !hal.buffer_view) -> tensor<4xbf16> {
// CHECK-DAG: %[[SIZE:.+]] = stream.tensor.sizeof tensor<4xbf16>
// CHECK: %[[RESOURCE:.+]] = stream.tensor.import %[[VIEW]] : !hal.buffer_view ->
// CHECK-SAME: tensor<2xui32> in !stream.resource<external>{%[[SIZE]]}
// CHECK-NEXT: %[[RESULT:.+]] = stream.async.transfer %[[RESOURCE]] :
// CHECK-SAME: !stream.resource<external>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
%0 = hal.tensor.import %view : !hal.buffer_view -> tensor<2xui32> as tensor<4xbf16>
- // CHECK: return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
- return %0 : tensor<4xbf16>
+ // CHECK: util.return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<4xbf16>
}
// -----
@@ -38,7 +38,7 @@
// CHECK-LABEL: @importBufferViewAsync
// CHECK-SAME: (%[[VIEW:.+]]: !hal.buffer_view, %[[FENCE:.+]]: !hal.fence)
// CHECK-SAME: -> (!stream.resource<*>, index)
-func.func @importBufferViewAsync(%view: !hal.buffer_view, %fence: !hal.fence) -> tensor<4xf32> {
+util.func public @importBufferViewAsync(%view: !hal.buffer_view, %fence: !hal.fence) -> tensor<4xf32> {
// CHECK-DAG: %[[SIZE:.+]] = stream.tensor.sizeof tensor<4xf32>
// CHECK: %[[ASYNC_RESOURCE:.+]] = stream.tensor.import %[[VIEW]]
// CHECK-SAME: : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%[[SIZE]]}
@@ -48,30 +48,30 @@
// CHECK-NEXT: %[[RESULT:.+]] = stream.async.transfer %[[SYNC_RESOURCE]]
// CHECK-SAME: : !stream.resource<external>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
%0 = hal.tensor.import wait(%fence) => %view : !hal.buffer_view -> tensor<4xf32>
- // CHECK: return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
- return %0 : tensor<4xf32>
+ // CHECK: util.return %[[RESULT]], %[[SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<4xf32>
}
// -----
// CHECK-LABEL: @exportBufferView
// CHECK-SAME: (%[[TENSOR:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
-func.func @exportBufferView(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index) -> !hal.buffer_view {
+util.func public @exportBufferView(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index) -> !hal.buffer_view {
// CHECK: %[[VIEW:.+]] = stream.async.transfer %[[TENSOR]] :
// CHECK-SAME: !stream.resource<*>{%[[SIZE]]} -> !stream.resource<external>{%[[SIZE]]}
// CHECK-NEXT: %[[RESULT:.+]] = stream.tensor.export %[[VIEW]] :
// CHECK-SAME: tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} in !stream.resource<external>{%[[SIZE]]}
// CHECK-SAME: -> !hal.buffer_view
%0 = hal.tensor.export %tensor : tensor<?x?x4xf32>{%dim0, %dim1} -> !hal.buffer_view
- // CHECK: return %[[RESULT]]
- return %0 : !hal.buffer_view
+ // CHECK: util.return %[[RESULT]]
+ util.return %0 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @exportBufferViewInPlace
// CHECK-SAME: (%[[TENSOR:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[STORAGE:.+]]: !hal.buffer)
-func.func @exportBufferViewInPlace(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index, %storage: !hal.buffer) -> !hal.buffer_view {
+util.func public @exportBufferViewInPlace(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index, %storage: !hal.buffer) -> !hal.buffer_view {
// CHECK: %[[STORAGE_SIZE:.+]] = stream.tensor.sizeof tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} : index
// CHECK-NEXT: %[[STORAGE_IMPORT:.+]] = stream.tensor.import %[[STORAGE]]
// CHECK-SAME: : !hal.buffer -> tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} in !stream.resource<external>{%[[STORAGE_SIZE]]}
@@ -81,8 +81,8 @@
// CHECK-SAME: tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} in !stream.resource<external>{%[[STORAGE_SIZE]]}
// CHECK-SAME: -> !hal.buffer_view
%0 = hal.tensor.export %tensor into(%storage : !hal.buffer) : tensor<?x?x4xf32>{%dim0, %dim1} -> !hal.buffer_view
- // CHECK: return %[[STORAGE_RESULT]]
- return %0 : !hal.buffer_view
+ // CHECK: util.return %[[STORAGE_RESULT]]
+ util.return %0 : !hal.buffer_view
}
// -----
@@ -91,10 +91,10 @@
// CHECK-LABEL: @exportBufferViewInPlaceToView
// CHECK-SAME: (%[[TENSOR:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index, %[[STORAGE:.+]]: !hal.buffer_view)
-func.func @exportBufferViewInPlaceToView(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index, %storage: !hal.buffer_view) -> !hal.buffer_view {
+util.func public @exportBufferViewInPlaceToView(%tensor: tensor<?x?x4xf32>, %dim0: index, %dim1: index, %storage: !hal.buffer_view) -> !hal.buffer_view {
// CHECK: %[[STORAGE_SIZE:.+]] = stream.tensor.sizeof tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} : index
// CHECK-NEXT: %[[STORAGE_IMPORT:.+]] = stream.tensor.import %[[STORAGE]]
// CHECK-SAME: : !hal.buffer_view -> tensor<?x?x4xf32>{%[[DIM0]], %[[DIM1]]} in !stream.resource<external>{%[[STORAGE_SIZE]]}
%0 = hal.tensor.export %tensor into(%storage : !hal.buffer_view) : tensor<?x?x4xf32>{%dim0, %dim1} -> !hal.buffer_view
- return %0 : !hal.buffer_view
+ util.return %0 : !hal.buffer_view
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp
index 93b7e4f..46c1c83 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp
@@ -11,6 +11,32 @@
namespace mlir::iree_compiler {
+void expandResourceOperand(Location loc, Value operand,
+ SmallVectorImpl<Value> &newOperands,
+ OpBuilder &builder) {
+ if (llvm::isa<TensorType>(operand.getType())) {
+ auto value = consumeTensorOperand(loc, operand, builder);
+ newOperands.push_back(value.resource);
+ newOperands.push_back(value.resourceSize);
+ } else if (llvm::isa<IREE::Stream::ResourceType>(operand.getType())) {
+ newOperands.push_back(operand);
+ newOperands.push_back(
+ builder.createOrFold<IREE::Stream::ResourceSizeOp>(loc, operand));
+ } else {
+ newOperands.push_back(operand);
+ }
+}
+
+SmallVector<Value> expandResourceOperands(Location loc, ValueRange operands,
+ ConversionPatternRewriter &rewriter) {
+ SmallVector<Value> expandedOperands;
+ expandedOperands.reserve(operands.size());
+ for (auto operand : operands) {
+ expandResourceOperand(loc, operand, expandedOperands, rewriter);
+ }
+ return expandedOperands;
+}
+
ConvertedTensor consumeTensorOperand(Location loc, Value operand,
OpBuilder &builder) {
auto operandType = operand.getType();
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h
index 0d4ba17..a7a864f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h
@@ -13,6 +13,13 @@
namespace mlir::iree_compiler {
+void expandResourceOperand(Location loc, Value operand,
+ SmallVectorImpl<Value> &newOperands,
+ OpBuilder &builder);
+
+SmallVector<Value> expandResourceOperands(Location loc, ValueRange operands,
+ ConversionPatternRewriter &rewriter);
+
// https://reviews.llvm.org/D111620 broke 1->N type expansion during dialect
// conversion. It inserts unrealized_conversion_casts but then passes the
// illegal source dialect types for pattern operands, meaning that even though
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/BUILD.bazel
index 17a87b0..e5ab347 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/BUILD.bazel
@@ -28,7 +28,6 @@
"@llvm-project//llvm:Support",
"@llvm-project//mlir:ArithDialect",
"@llvm-project//mlir:ControlFlowDialect",
- "@llvm-project//mlir:FuncDialect",
"@llvm-project//mlir:FunctionInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MemRefDialect",
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/CMakeLists.txt
index d46e22a..d1462bc 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/CMakeLists.txt
@@ -23,7 +23,6 @@
LLVMSupport
MLIRArithDialect
MLIRControlFlowDialect
- MLIRFuncDialect
MLIRFunctionInterfaces
MLIRIR
MLIRMemRefDialect
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp
index 6a1afe7..1ea3b7d 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/ConvertStructuralOps.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/SmallVector.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -26,140 +25,6 @@
namespace {
-struct FuncOpSignatureConversion
- : public OpConversionPattern<mlir::func::FuncOp> {
- using OpConversionPattern::OpConversionPattern;
- LogicalResult
- matchAndRewrite(mlir::func::FuncOp funcOp, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- auto &typeConverter = *getTypeConverter();
-
- // Convert the input signature types.
- // TODO(benvanik): dynamic shapes by passing in tensor dynamic dims.
- auto originalType = funcOp.getFunctionType();
- TypeConverter::SignatureConversion newSignature(
- originalType.getNumInputs());
- for (auto argType : llvm::enumerate(originalType.getInputs())) {
- if (failed(typeConverter.convertSignatureArg(
- argType.index(), argType.value(), newSignature))) {
- return failure();
- }
- }
- SmallVector<Type> newResultTypes;
- if (failed(typeConverter.convertTypes(originalType.getResults(),
- newResultTypes))) {
- return failure();
- }
-
- // Replace function.
- auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
- newFuncOp.getBlocks().clear();
- rewriter.inlineRegionBefore(funcOp.getFunctionBody(),
- newFuncOp.getFunctionBody(), newFuncOp.end());
- newFuncOp.setType(rewriter.getFunctionType(newSignature.getConvertedTypes(),
- newResultTypes));
- if (failed(rewriter.convertRegionTypes(&newFuncOp.getFunctionBody(),
- typeConverter, &newSignature))) {
- return failure();
- }
-
- rewriter.eraseOp(funcOp);
- return success();
- }
-};
-
-static SmallVector<Value>
-expandResourceOperands(Location loc, ValueRange operands,
- ConversionPatternRewriter &rewriter) {
- SmallVector<Value> expandedOperands;
- expandedOperands.reserve(operands.size());
- for (auto operand : operands) {
- if (llvm::isa<TensorType>(operand.getType())) {
- auto value = consumeTensorOperand(loc, operand, rewriter);
- expandedOperands.push_back(value.resource);
- expandedOperands.push_back(value.resourceSize);
- } else if (llvm::isa<IREE::Stream::ResourceType>(operand.getType())) {
- expandedOperands.push_back(operand);
- expandedOperands.push_back(
- rewriter.createOrFold<IREE::Stream::ResourceSizeOp>(loc, operand));
- } else {
- expandedOperands.push_back(operand);
- }
- }
- return expandedOperands;
-}
-
-struct CallOpConversion : public OpConversionPattern<mlir::func::CallOp> {
- using OpConversionPattern::OpConversionPattern;
- LogicalResult
- matchAndRewrite(mlir::func::CallOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- // Expand any resource operands to resource + size.
- auto expandedOperands =
- expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter);
-
- // Expand any resource results to resource + size.
- SmallVector<Type> expandedTypes;
- struct Result {
- size_t originalIndex;
- size_t newIndex;
- Type newType;
- };
- SmallVector<Result> resultMap;
- for (auto originalType : llvm::enumerate(op.getResultTypes())) {
- SmallVector<Type> newTypes;
- if (failed(getTypeConverter()->convertType(originalType.value(),
- newTypes))) {
- return rewriter.notifyMatchFailure(op,
- "unable to convert result types");
- }
- resultMap.push_back(
- Result{originalType.index(), expandedTypes.size(), newTypes.front()});
- expandedTypes.append(newTypes);
- }
-
- // Create a new call that takes the expanded input operands and returns the
- // expanded output results. We can't directly replace the original call as
- // the result counts differ.
- auto callOp = rewriter.create<mlir::func::CallOp>(
- op.getLoc(), expandedTypes, op.getCallee(), expandedOperands);
-
- // Tie all resource results together so we end up with 1:1 results with the
- // original op.
- SmallVector<Value> results;
- for (auto result : resultMap) {
- if (llvm::isa<IREE::Stream::ResourceType>(result.newType)) {
- auto oldType = op.getResult(result.originalIndex).getType();
- auto resource = callOp.getResult(result.newIndex + 0);
- auto resourceSize = callOp.getResult(result.newIndex + 1);
- results.push_back(rewriter
- .create<mlir::UnrealizedConversionCastOp>(
- op.getLoc(), TypeRange{oldType},
- ValueRange{resource, resourceSize})
- .getResult(0));
- } else {
- results.push_back(callOp.getResult(result.newIndex));
- }
- }
- rewriter.replaceOp(op, results);
-
- return success();
- }
-};
-
-struct ReturnOpConversion : public OpConversionPattern<mlir::func::ReturnOp> {
- using OpConversionPattern::OpConversionPattern;
- LogicalResult
- matchAndRewrite(mlir::func::ReturnOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- // Expand any resource operands to resource + size.
- auto expandedOperands =
- expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter);
- rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(op, expandedOperands);
- return success();
- }
-};
-
struct BranchOpConversion : public OpConversionPattern<mlir::cf::BranchOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
@@ -495,6 +360,19 @@
} // namespace
+template <typename OpT>
+static inline void addGenericLegalOp(ConversionTarget &conversionTarget,
+ TypeConverter &typeConverter) {
+ conversionTarget.addDynamicallyLegalOp<OpT>([&](OpT op) {
+ return llvm::all_of(
+ op->getOperandTypes(),
+ [&typeConverter](Type t) { return typeConverter.isLegal(t); }) &&
+ llvm::all_of(op->getResultTypes(), [&typeConverter](Type t) {
+ return typeConverter.isLegal(t);
+ });
+ });
+}
+
void populateStandardStructuralToStreamPatterns(
MLIRContext *context, ConversionTarget &conversionTarget,
TypeConverter &typeConverter, RewritePatternSet &patterns) {
@@ -504,82 +382,25 @@
// dynamic legality checker to force any ops using such types to run through
// our patterns.
- conversionTarget.addDynamicallyLegalOp<mlir::func::FuncOp>(
- [&](mlir::func::FuncOp op) {
- return typeConverter.isSignatureLegal(op.getFunctionType()) &&
- typeConverter.isLegal(&op.getBody());
- });
- conversionTarget.addDynamicallyLegalOp<mlir::func::CallOp>(
- [&](mlir::func::CallOp op) {
- return llvm::all_of(
- op.getOperandTypes(),
- [&](Type type) { return typeConverter.isLegal(type); }) &&
- llvm::all_of(op.getResultTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::func::ReturnOp>(
- [&](mlir::func::ReturnOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
-
- conversionTarget.addDynamicallyLegalOp<mlir::cf::BranchOp>(
- [&](mlir::cf::BranchOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::cf::CondBranchOp>(
- [&](mlir::cf::CondBranchOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::cf::SwitchOp>(
- [&](mlir::cf::SwitchOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::scf::IfOp>(
- [&](mlir::scf::IfOp op) {
- return llvm::all_of(op.getResultTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::scf::ForOp>(
- [&](mlir::scf::ForOp op) {
- return llvm::all_of(op.getResultTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::scf::WhileOp>(
- [&](mlir::scf::WhileOp op) {
- return llvm::all_of(op.getResultTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::scf::ConditionOp>(
- [&](mlir::scf::ConditionOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
- conversionTarget.addDynamicallyLegalOp<mlir::scf::YieldOp>(
- [&](mlir::scf::YieldOp op) {
- return llvm::all_of(op.getOperandTypes(), [&](Type type) {
- return typeConverter.isLegal(type);
- });
- });
-
+ addGenericLegalOp<mlir::cf::BranchOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::cf::CondBranchOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::cf::SwitchOp>(conversionTarget, typeConverter);
patterns
- .insert<FuncOpSignatureConversion, CallOpConversion, ReturnOpConversion,
- BranchOpConversion, CondBranchOpConversion, SwitchOpConversion,
- SelectOpConversion, ScfConditionOpConversion, ScfIfOpConversion,
- ScfForOpConversion, ScfWhileOpConversion, ScfYieldOpConversion>(
+ .insert<BranchOpConversion, CondBranchOpConversion, SwitchOpConversion>(
typeConverter, context);
+
+ addGenericLegalOp<mlir::arith::SelectOp>(conversionTarget, typeConverter);
+ patterns.insert<SelectOpConversion>(typeConverter, context);
+
+ addGenericLegalOp<mlir::scf::IfOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::scf::ForOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::scf::WhileOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::scf::ConditionOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<mlir::scf::YieldOp>(conversionTarget, typeConverter);
+ patterns
+ .insert<ScfConditionOpConversion, ScfIfOpConversion, ScfForOpConversion,
+ ScfWhileOpConversion, ScfYieldOpConversion>(typeConverter,
+ context);
}
} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/constant_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/constant_ops.mlir
index e0ed3d5..be63175 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/constant_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/constant_ops.mlir
@@ -1,19 +1,19 @@
// RUN: iree-opt --split-input-file --iree-stream-conversion %s | FileCheck %s
// CHECK-LABEL: @constantTensor
-func.func @constantTensor() {
+util.func public @constantTensor() {
// CHECK: %[[CST:.+]] = stream.tensor.constant : tensor<2xi32> in !stream.resource<constant> = dense<[1, 2]> : tensor<2xi32>
// CHECK: %[[SIZE:.+]] = stream.resource.size %[[CST]] : !stream.resource<constant>
// CHECK: %[[T:.+]] = stream.async.transfer %[[CST]] : !stream.resource<constant>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
%0 = arith.constant dense<[1, 2]> : tensor<2xi32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @emptyTensor
-func.func @emptyTensor() {
+util.func public @emptyTensor() {
// CHECK: %[[CST:.+]] = stream.tensor.constant : tensor<2x0xi32> in !stream.resource<constant> = dense<> : tensor<2x0xi32>
%0 = arith.constant dense<> : tensor<2x0xi32>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/structural_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/structural_ops.mlir
index 2c5f0c6..1f5293b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/structural_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/test/structural_ops.mlir
@@ -1,35 +1,15 @@
// RUN: iree-opt --split-input-file --iree-stream-conversion %s | FileCheck %s
-// CHECK-LABEL: @functionExpansion
-// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[ARG0_SIZE:.+]]: index,
-// CHECK-SAME: %[[ARG1:.+]]: i1,
-// CHECK-SAME: %[[ARG2:.+]]: !stream.resource<*>, %[[ARG2_SIZE:.+]]: index)
-// CHECK-SAME: -> (!stream.resource<*>, index, i1, !stream.resource<*>, index)
-func.func @functionExpansion(%arg0: tensor<4x?xf32>, %arg1: i1, %arg2: tensor<i32>)
- -> (tensor<4x?xf32>, i1, tensor<i32>) {
- // CHECK-NEXT: %[[RET:.+]]:5 = call @callee(%[[ARG0]], %[[ARG0_SIZE]], %[[ARG1]], %[[ARG2]], %[[ARG2_SIZE]])
- // CHECK-SAME: : (!stream.resource<*>, index, i1, !stream.resource<*>, index) -> (!stream.resource<*>, index, i1, !stream.resource<*>, index)
- %0:3 = call @callee(%arg0, %arg1, %arg2) : (tensor<4x?xf32>, i1, tensor<i32>) -> (tensor<4x?xf32>, i1, tensor<i32>)
- // CHECK: return %[[RET]]#0, %[[RET]]#1, %[[RET]]#2, %[[RET]]#3, %[[RET]]#4 : !stream.resource<*>, index, i1, !stream.resource<*>, index
- return %0#0, %0#1, %0#2 : tensor<4x?xf32>, i1, tensor<i32>
-}
-
-// CHECK: func.func private @callee
-func.func private @callee(%arg0: tensor<4x?xf32>, %arg1: i1, %arg2: tensor<i32>)
- -> (tensor<4x?xf32>, i1, tensor<i32>)
-
-// -----
-
// CHECK-LABEL: @brExpansion
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[ARG0_SIZE:.+]]: index, %arg2: i1)
// CHECK-SAME: -> (!stream.resource<*>, index, i1)
-func.func @brExpansion(%arg0: tensor<1xf32>, %arg1: i1) -> (tensor<1xf32>, i1) {
+util.func public @brExpansion(%arg0: tensor<1xf32>, %arg1: i1) -> (tensor<1xf32>, i1) {
// CHECK: cf.br ^bb1(%[[ARG0]], %[[ARG0_SIZE]], %arg2 : !stream.resource<*>, index, i1)
cf.br ^bb1(%arg0, %arg1 : tensor<1xf32>, i1)
// CHECK: ^bb1(%[[BB_ARG0:.+]]: !stream.resource<*>, %[[BB_ARG1:.+]]: index, %[[BB_ARG2:.+]]: i1):
^bb1(%0: tensor<1xf32>, %1: i1):
- // CHECK: return %[[BB_ARG0]], %[[BB_ARG1]], %[[BB_ARG2]] : !stream.resource<*>, index, i1
- return %0, %1 : tensor<1xf32>, i1
+ // CHECK: util.return %[[BB_ARG0]], %[[BB_ARG1]], %[[BB_ARG2]] : !stream.resource<*>, index, i1
+ util.return %0, %1 : tensor<1xf32>, i1
}
// -----
@@ -38,14 +18,14 @@
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[ARG0_SIZE:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: !stream.resource<*>, %[[ARG1_SIZE:.+]]: index)
// CHECK-SAME: -> (!stream.resource<*>, index)
-func.func @condBrExpansion(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
+util.func public @condBrExpansion(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
%true = arith.constant 1 : i1
// CHECK: cf.cond_br %true,
// CHECK-SAME: ^bb1(%[[ARG0]], %[[ARG0_SIZE]] : !stream.resource<*>, index),
// CHECK-SAME: ^bb1(%[[ARG1]], %[[ARG1_SIZE]] : !stream.resource<*>, index)
cf.cond_br %true, ^bb1(%arg0 : tensor<1xf32>), ^bb1(%arg1 : tensor<1xf32>)
^bb1(%0: tensor<1xf32>):
- return %0 : tensor<1xf32>
+ util.return %0 : tensor<1xf32>
}
// -----
@@ -54,7 +34,7 @@
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[ARG0_SIZE:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: !stream.resource<*>, %[[ARG1_SIZE:.+]]: index)
// CHECK-SAME: -> (!stream.resource<*>, index)
-func.func @switchExpansion(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
+util.func public @switchExpansion(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
%flag = arith.constant 1 : i32
// CHECK: %[[FLAG:.+]] = arith.constant 1 : i32
// CHECK: cf.switch %[[FLAG]] : i32, [
@@ -66,9 +46,9 @@
0: ^bb2(%arg1 : tensor<1xf32>)
]
^bb1(%0: tensor<1xf32>):
- return %0 : tensor<1xf32>
+ util.return %0 : tensor<1xf32>
^bb2(%1: tensor<1xf32>):
- return %1 : tensor<1xf32>
+ util.return %1 : tensor<1xf32>
}
// -----
@@ -78,19 +58,19 @@
// CHECK-SAME: %[[COND:.+]]: i1,
// CHECK-SAME: %[[ARG1:.+]]: !stream.resource<*>, %[[ARG1_SIZE:.+]]: index)
// CHECK-SAME: -> (!stream.resource<*>, index)
-func.func @selectExpansion(%arg0: tensor<1xf32>, %cond: i1, %arg1: tensor<1xf32>) -> tensor<1xf32> {
+util.func public @selectExpansion(%arg0: tensor<1xf32>, %cond: i1, %arg1: tensor<1xf32>) -> tensor<1xf32> {
// CHECK-DAG: %[[RET:.+]] = arith.select %[[COND]], %[[ARG0]], %[[ARG1]] : !stream.resource<*>
// CHECK-DAG: %[[RET_SIZE:.+]] = arith.select %[[COND]], %[[ARG0_SIZE]], %[[ARG1_SIZE]] : index
%0 = arith.select %cond, %arg0, %arg1 : tensor<1xf32>
- // CHECK: return %[[RET]], %[[RET_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<1xf32>
+ // CHECK: util.return %[[RET]], %[[RET_SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<1xf32>
}
// -----
// CHECK-LABEL: @scfIfExpansion
// CHECK-SAME: %[[COND:.+]]: i1, %[[ARG0:.+]]: !stream.resource<*>, %[[IDX0:.+]]: index, %[[ARG1:.+]]: !stream.resource<*>, %[[IDX1:.+]]: index
-func.func @scfIfExpansion(%cond: i1, %arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
+util.func public @scfIfExpansion(%cond: i1, %arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
// CHECK: %[[IF:.+]]:2 = scf.if %arg0 -> (!stream.resource<*>, index)
%0 = scf.if %cond -> tensor<1xf32> {
// CHECK: scf.yield %[[ARG0]], %[[IDX0]]
@@ -99,15 +79,15 @@
// CHECK: scf.yield %[[ARG1]], %[[IDX1]]
scf.yield %arg1 : tensor<1xf32>
}
- // CHECK: return %[[IF]]#0, %[[IF]]#1
- return %0 : tensor<1xf32>
+ // CHECK: util.return %[[IF]]#0, %[[IF]]#1
+ util.return %0 : tensor<1xf32>
}
// -----
// CHECK-LABEL: @scfWhileExpansion
// CHECK-SAME: %[[ARG0:.+]]: i32, %[[ARG1:.+]]: !stream.resource<*>, %[[ARG2:.+]]: index
-func.func @scfWhileExpansion(%arg0 : i32, %arg1 : tensor<1xf32>) {
+util.func public @scfWhileExpansion(%arg0 : i32, %arg1 : tensor<1xf32>) {
%c1 = arith.constant 1 : i32
%c10 = arith.constant 10 : i32
// CHECK: scf.while
@@ -124,7 +104,7 @@
// CHECK: scf.yield %[[V:.+]], %[[ARG1]], %[[ARG2]] : i32, !stream.resource<*>, index
scf.yield %1, %arg1 : i32, tensor<1xf32>
}
- return
+ util.return
}
// -----
@@ -133,7 +113,7 @@
// CHECK-SAME: %[[ARG0:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: !stream.resource<*>,
// CHECK-SAME: %[[ARG2:.+]]: index
-func.func @scfWhileExpansion(%arg0 : index, %arg1 : tensor<1xf32>) {
+util.func public @scfWhileExpansion(%arg0 : index, %arg1 : tensor<1xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -143,5 +123,5 @@
scf.for %i = %c0 to %arg0 step %c1 iter_args(%arg2 = %arg1) -> (tensor<1xf32>) {
scf.yield %arg2 : tensor<1xf32>
}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/BUILD.bazel
index 0484ee7..4a01cc1 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/BUILD.bazel
@@ -25,7 +25,6 @@
"//compiler/src/iree/compiler/Dialect/Stream/IR",
"//compiler/src/iree/compiler/Dialect/Util/Conversion",
"//compiler/src/iree/compiler/Dialect/Util/IR",
- "@llvm-project//mlir:FuncDialect",
"@llvm-project//mlir:FunctionInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Transforms",
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/CMakeLists.txt
index ff93dbd..e3957bb 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/CMakeLists.txt
@@ -18,7 +18,6 @@
SRCS
"Patterns.cpp"
DEPS
- MLIRFuncDialect
MLIRFunctionInterfaces
MLIRIR
MLIRTransforms
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp
index c73549f..ad2ba06 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp
@@ -20,6 +20,122 @@
namespace {
//===----------------------------------------------------------------------===//
+// Structural ops
+//===----------------------------------------------------------------------===//
+
+struct FuncOpSignatureConversion
+ : public OpConversionPattern<IREE::Util::FuncOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::FuncOp funcOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto &typeConverter = *getTypeConverter();
+
+ // Replace function and convert the signature for region conversion below.
+ TypeConverter::SignatureConversion newSignature(funcOp.getNumArguments());
+ auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
+ bool anyFailed = false;
+ newFuncOp.expandSignature(
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ if (failed(typeConverter.convertTypes(type, newTypes))) {
+ anyFailed = true;
+ }
+ if (failed(
+ typeConverter.convertSignatureArg(i, type, newSignature))) {
+ anyFailed = true;
+ }
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ if (failed(typeConverter.convertTypes(type, newTypes))) {
+ anyFailed = true;
+ }
+ });
+ if (anyFailed) {
+ return rewriter.notifyMatchFailure(
+ funcOp, "unable to convert argument/result types");
+ }
+ newFuncOp.getBlocks().clear();
+ rewriter.inlineRegionBefore(funcOp.getFunctionBody(),
+ newFuncOp.getFunctionBody(), newFuncOp.end());
+ if (failed(rewriter.convertRegionTypes(&newFuncOp.getFunctionBody(),
+ typeConverter, &newSignature))) {
+ return failure();
+ }
+
+ rewriter.eraseOp(funcOp);
+ return success();
+ }
+};
+
+struct CallOpConversion : public OpConversionPattern<IREE::Util::CallOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::CallOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // Create a new call that takes the expanded input operands and returns the
+ // expanded output results. We can't directly replace the original call as
+ // the result counts differ.
+ struct Result {
+ size_t originalIndex;
+ size_t newIndex;
+ Type newType;
+ };
+ SmallVector<Result> resultMap;
+ bool anyFailed = false;
+ auto callOp = op.cloneAndExpand(
+ [&](unsigned i, Value operand, SmallVectorImpl<Value> &newOperands) {
+ auto adaptorOperand = adaptor.getOperands()[i];
+ expandResourceOperand(op.getLoc(), adaptorOperand, newOperands,
+ rewriter);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ size_t newIndex = newTypes.size();
+ if (failed(getTypeConverter()->convertType(type, newTypes)))
+ anyFailed = true;
+ resultMap.push_back(Result{i, newIndex, newTypes[newIndex]});
+ },
+ rewriter);
+ if (anyFailed) {
+ return rewriter.notifyMatchFailure(op, "unable to convert result types");
+ }
+
+ // Tie all resource results together so we end up with 1:1 results with the
+ // original op.
+ SmallVector<Value> results;
+ for (auto result : resultMap) {
+ if (llvm::isa<IREE::Stream::ResourceType>(result.newType)) {
+ auto oldType = op.getResult(result.originalIndex).getType();
+ auto resource = callOp.getResult(result.newIndex + 0);
+ auto resourceSize = callOp.getResult(result.newIndex + 1);
+ results.push_back(rewriter
+ .create<mlir::UnrealizedConversionCastOp>(
+ op.getLoc(), TypeRange{oldType},
+ ValueRange{resource, resourceSize})
+ .getResult(0));
+ } else {
+ results.push_back(callOp.getResult(result.newIndex));
+ }
+ }
+ rewriter.replaceOp(op, results);
+
+ return success();
+ }
+};
+
+struct ReturnOpConversion : public OpConversionPattern<IREE::Util::ReturnOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::ReturnOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // Expand any resource operands to resource + size.
+ auto expandedOperands =
+ expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter);
+ rewriter.replaceOpWithNewOp<IREE::Util::ReturnOp>(op, expandedOperands);
+ return success();
+ }
+};
+
+//===----------------------------------------------------------------------===//
// Globals
//===----------------------------------------------------------------------===//
@@ -223,6 +339,10 @@
void populateUtilToStreamConversionPatterns(MLIRContext *context,
TypeConverter &typeConverter,
RewritePatternSet &patterns) {
+ patterns
+ .insert<FuncOpSignatureConversion, CallOpConversion, ReturnOpConversion>(
+ typeConverter, context);
+
auto expansionState = std::make_shared<GlobalExpansionState>();
// TODO(#7432): add indirect global expansion support to streams.
patterns
@@ -259,8 +379,15 @@
return success();
});
- conversionTarget
- .addLegalOp<IREE::Util::InitializerOp, IREE::Util::ReturnOp>();
+ conversionTarget.addLegalOp<IREE::Util::InitializerOp>();
+ conversionTarget.addDynamicallyLegalOp<IREE::Util::FuncOp>(
+ [&](IREE::Util::FuncOp op) {
+ return typeConverter.isSignatureLegal(op.getFunctionType()) &&
+ typeConverter.isLegal(&op.getBody());
+ });
+ addGenericLegalOp<IREE::Util::CallOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<IREE::Util::ReturnOp>(conversionTarget, typeConverter);
+
conversionTarget.addDynamicallyLegalOp<IREE::Util::GlobalOp>(
[&](IREE::Util::GlobalOp op) {
return typeConverter.isLegal(op.getType()) &&
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/BUILD.bazel
index 888725a..3ed0e2c 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/BUILD.bazel
@@ -18,6 +18,7 @@
[
"compiler_hints.mlir",
"global_ops.mlir",
+ "structural_ops.mlir",
],
include = ["*.mlir"],
),
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/CMakeLists.txt
index 3223597..43ad040 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/CMakeLists.txt
@@ -16,6 +16,7 @@
SRCS
"compiler_hints.mlir"
"global_ops.mlir"
+ "structural_ops.mlir"
TOOLS
FileCheck
iree-opt
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir
index d4ea662..f12a2ad 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir
@@ -1,11 +1,11 @@
// RUN: iree-opt --split-input-file --iree-stream-conversion %s | FileCheck %s
// CHECK-LABEL: @optimizationBarrier
-func.func @optimizationBarrier(%arg0: tensor<i32>) -> tensor<i32> {
+util.func public @optimizationBarrier(%arg0: tensor<i32>) -> tensor<i32> {
// CHECK: stream.async.transfer
// CHECK: %[[RESOURCE:.*]] = util.optimization_barrier %0
// CHECK: %[[SIZE:.*]] = stream.resource.size %1 : !stream.resource<*>
- // CHECK: return %[[RESOURCE]], %[[SIZE]] : !stream.resource<*>, index
+ // CHECK: util.return %[[RESOURCE]], %[[SIZE]] : !stream.resource<*>, index
%0 = util.optimization_barrier %arg0 : tensor<i32>
- return %0 : tensor<i32>
+ util.return %0 : tensor<i32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/global_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/global_ops.mlir
index 105a513..34b8bba 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/global_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/global_ops.mlir
@@ -4,7 +4,7 @@
// CHECK: util.global public mutable @var_i32__size : index
util.global public mutable @var_i32 : tensor<i32>
// CHECK-LABEL: @mutableGlobal
-func.func @mutableGlobal() {
+util.func public @mutableGlobal() {
// CHECK-DAG: %[[VAR:.+]] = util.global.load @var_i32 : !stream.resource<variable>
// CHECK-DAG: %[[SIZE:.+]] = util.global.load @var_i32__size : index
// CHECK: %[[LOAD_T:.+]] = stream.async.transfer %[[VAR]] : !stream.resource<variable>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
@@ -13,18 +13,18 @@
// CHECK-DAG: util.global.store %[[STORE_T]], @var_i32 : !stream.resource<variable>
// CHECK-DAG: util.global.store %[[SIZE]], @var_i32__size : index
util.global.store %0, @var_i32 : tensor<i32>
- return
+ util.return
}
// -----
// TODO(#7432): add indirect global expansion support to streams.
// util.global public mutable @var_indirect : tensor<i32>
-// func.func @mutableGlobalIndirect() {
+// util.func public @mutableGlobalIndirect() {
// %0 = util.global.address @var_indirect : !util.ptr<tensor<i32>>
// %1 = util.global.load.indirect %0 : !util.ptr<tensor<i32>> -> tensor<i32>
// util.global.store.indirect %1, %0 : tensor<i32> -> !util.ptr<tensor<i32>>
-// return
+// util.return
// }
// -----
@@ -38,14 +38,14 @@
// CHECK-DAG: util.global.store %[[SIZE]], @var_with_tensor_initializer__size : index
util.global public mutable @var_with_tensor_initializer = dense<0.000000e+00> : tensor<f32>
// CHECK-LABEL: @initializedGlobal
-func.func @initializedGlobal() {
+util.func public @initializedGlobal() {
// CHECK-DAG: = util.global.load @var_with_tensor_initializer : !stream.resource<variable>
// CHECK-DAG: = util.global.load @var_with_tensor_initializer__size : index
%0 = util.global.load @var_with_tensor_initializer : tensor<f32>
// CHECK-DAG: util.global.store %{{.+}}, @var_with_tensor_initializer : !stream.resource<variable>
// CHECK-DAG: util.global.store %{{.+}}, @var_with_tensor_initializer__size : index
util.global.store %0, @var_with_tensor_initializer : tensor<f32>
- return
+ util.return
}
// -----
@@ -59,14 +59,14 @@
// CHECK-DAG: util.global.store %[[SIZE]], @var_with_tensor_uninitialized__size : index
util.global private mutable @var_with_tensor_uninitialized = #util.uninitialized : tensor<4xf32>
// CHECK-LABEL: @uninitializedGlobalTensor
-func.func @uninitializedGlobalTensor() {
+util.func public @uninitializedGlobalTensor() {
// CHECK-DAG: = util.global.load @var_with_tensor_uninitialized : !stream.resource<variable>
// CHECK-DAG: = util.global.load @var_with_tensor_uninitialized__size : index
%0 = util.global.load @var_with_tensor_uninitialized : tensor<4xf32>
// CHECK-DAG: util.global.store %{{.+}}, @var_with_tensor_uninitialized : !stream.resource<variable>
// CHECK-DAG: util.global.store %{{.+}}, @var_with_tensor_uninitialized__size : index
util.global.store %0, @var_with_tensor_uninitialized : tensor<4xf32>
- return
+ util.return
}
// -----
@@ -78,7 +78,7 @@
// CHECK-DAG: util.global public mutable @var_with_buffer_view_store__size : index
util.global public mutable @var_with_buffer_view_store : tensor<?x4xf32>
// CHECK-LABEL: @globalStoreFromExternal
-func.func @globalStoreFromExternal(%arg0: !hal.buffer_view) {
+util.func public @globalStoreFromExternal(%arg0: !hal.buffer_view) {
// CHECK: %[[DIM0:.+]] = hal.buffer_view.dim
%dim0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
// CHECK: %[[SIZE:.+]] = stream.tensor.sizeof tensor<?x4xf32>{%[[DIM0]]} : index
@@ -89,7 +89,7 @@
// CHECK: util.global.store %[[VAR]], @var_with_buffer_view_store : !stream.resource<variable>
// CHECK: util.global.store %[[SIZE]], @var_with_buffer_view_store__size : index
util.global.store %0, @var_with_buffer_view_store : tensor<?x4xf32>
- return
+ util.return
}
// -----
@@ -99,9 +99,9 @@
// TODO(#7432): add indirect global expansion support to streams.
// util.global public mutable @var_indirect_with_buffer_view_store : tensor<i32>
-// func.func @globalStoreFromExternalIndirect(%arg0: !hal.buffer_view) {
+// util.func public @globalStoreFromExternalIndirect(%arg0: !hal.buffer_view) {
// %0 = util.global.address @var_indirect_with_buffer_view_store : !util.ptr<tensor<i32>>
// %1 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<i32>
// util.global.store.indirect %1, %0 : tensor<i32> -> !util.ptr<tensor<i32>>
-// return
+// util.return
// }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/structural_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/structural_ops.mlir
new file mode 100644
index 0000000..c66e9a3
--- /dev/null
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/structural_ops.mlir
@@ -0,0 +1,21 @@
+// RUN: iree-opt --split-input-file --iree-stream-conversion %s | FileCheck %s
+
+// CHECK-LABEL: @functionExpansion
+// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[ARG0_SIZE:.+]]: index,
+// CHECK-SAME: %[[ARG1:.+]]: i1,
+// CHECK-SAME: %[[ARG2:.+]]: !stream.resource<*>, %[[ARG2_SIZE:.+]]: index)
+// CHECK-SAME: -> (!stream.resource<*>, index, i1, !stream.resource<*>, index)
+util.func private @functionExpansion(%arg0: tensor<4x?xf32>, %arg1: i1, %arg2: tensor<i32>)
+ -> (tensor<4x?xf32>, i1, tensor<i32>) {
+ // CHECK-NEXT: %[[RET:.+]]:5 = util.call @callee(%[[ARG0]], %[[ARG0_SIZE]], %[[ARG1]], %[[ARG2]], %[[ARG2_SIZE]])
+ // CHECK-SAME: : (!stream.resource<*>, index, i1, !stream.resource<*>, index) -> (!stream.resource<*>, index, i1, !stream.resource<*>, index)
+ %0:3 = util.call @callee(%arg0, %arg1, %arg2) : (tensor<4x?xf32>, i1, tensor<i32>) -> (tensor<4x?xf32>, i1, tensor<i32>)
+ // CHECK: util.return %[[RET]]#0, %[[RET]]#1, %[[RET]]#2, %[[RET]]#3, %[[RET]]#4 : !stream.resource<*>, index, i1, !stream.resource<*>, index
+ util.return %0#0, %0#1, %0#2 : tensor<4x?xf32>, i1, tensor<i32>
+}
+
+// CHECK: util.func private @callee
+util.func private @callee(%arg0: tensor<4x?xf32>, %arg1: i1, %arg2: tensor<i32>)
+ -> (tensor<4x?xf32>, i1, tensor<i32>) {
+ util.return %arg0, %arg1, %arg2 : tensor<4x?xf32>, i1, tensor<i32>
+}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_folding.mlir
index de8a0f1..1438588 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_folding.mlir
@@ -4,7 +4,7 @@
// We likely want to clone instead to reduce lifetime of the splats.
// CHECK-LABEL: @SinkSplatsToConsumers
-func.func @SinkSplatsToConsumers(
+util.func private @SinkSplatsToConsumers(
%arg0: i1, %arg1: i1,
%arg2: !stream.resource<*>,
%arg3: !stream.resource<*>,
@@ -37,13 +37,13 @@
cf.br ^bb4(%3 : !stream.resource<*>)
// CHECK: ^bb4(
^bb4(%arg6: !stream.resource<*>):
- return %arg6 : !stream.resource<*>
+ util.return %arg6 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @SinkSplatsToCommonAncestorOfConsumersInRegions
-func.func @SinkSplatsToCommonAncestorOfConsumersInRegions(%arg0: i1) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @SinkSplatsToCommonAncestorOfConsumersInRegions(%arg0: i1) -> (!stream.resource<*>, !stream.resource<*>) {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -81,13 +81,13 @@
%6 = stream.async.dispatch @executable::@dispatch1[%c1, %c2, %c3](%0[%c0 to %c100 for %c100], %1[%c0 to %c100 for %c100]) : (!stream.resource<*>{%c100}, !stream.resource<*>{%c100}) -> !stream.resource<*>{%c100}
scf.yield %6 : !stream.resource<*>
}
- return %4, %3 : !stream.resource<*>, !stream.resource<*>
+ util.return %4, %3 : !stream.resource<*>, !stream.resource<*>
}
// -----
// CHECK-LABEL: @SplatAlreadyAtSinkLocation
-func.func @SplatAlreadyAtSinkLocation(
+util.func private @SplatAlreadyAtSinkLocation(
%arg0: i1, %arg1: i1,
%arg2: !stream.resource<*>,
%arg3: !stream.resource<*>,
@@ -120,13 +120,13 @@
cf.br ^bb3(%3 : !stream.resource<*>)
// CHECK: ^bb3(
^bb3(%arg6: !stream.resource<*>):
- return %arg6 : !stream.resource<*>
+ util.return %arg6 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @PropagateClonableOps
-func.func @PropagateClonableOps(%arg0: index) -> !stream.resource<*> {
+util.func private @PropagateClonableOps(%arg0: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -134,38 +134,38 @@
%0 = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%arg0}
// CHECK-NOT: stream.async.clone
%1 = stream.async.clone %0 : !stream.resource<*>{%arg0} -> !stream.resource<*>{%arg0}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @ConvertSplatConstantsIntoSplats
-func.func @ConvertSplatConstantsIntoSplats(%arg0: index) -> (!stream.resource<transient>, !stream.resource<transient>) {
+util.func private @ConvertSplatConstantsIntoSplats(%arg0: index) -> (!stream.resource<transient>, !stream.resource<transient>) {
// CHECK: %[[CST:.+]] = arith.constant 3 : i32
// CHECK: = stream.async.constant : !stream.resource<transient>{%arg0} = dense<[1, 2, 3, 4, 5, 6, 7, 8]> : tensor<8xi32>
%0 = stream.async.constant : !stream.resource<transient>{%arg0} = dense<[1, 2, 3, 4, 5, 6, 7, 8]> : tensor<8xi32>
// CHECK-NOT: = stream.async.constant : !stream.resource<transient>{%arg0} = dense<[3]> : tensor<8xi32>
// CHECK: = stream.async.splat %[[CST]] : i32 -> !stream.resource<transient>{%arg0}
%1 = stream.async.constant : !stream.resource<transient>{%arg0} = dense<3> : tensor<8xi32>
- return %0, %1 : !stream.resource<transient>, !stream.resource<transient>
+ util.return %0, %1 : !stream.resource<transient>, !stream.resource<transient>
}
// -----
// CHECK-LABEL: @FoldAsyncSliceOp
-func.func @FoldAsyncSliceOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @FoldAsyncSliceOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.async.slice
%0 = stream.async.slice %arg0[%c0 to %arg1] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%arg1}
- // CHECK: return %arg0
- return %0 : !stream.resource<*>
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @PropagateSplatsThroughSlices
-func.func @PropagateSplatsThroughSlices(%arg0: index) -> !stream.resource<*> {
+util.func private @PropagateSplatsThroughSlices(%arg0: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -173,38 +173,38 @@
%0 = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%arg0}
// CHECK-NOT: stream.async.slice
%1 = stream.async.slice %0[%c0 to %c128] : !stream.resource<*>{%arg0} -> !stream.resource<*>{%c128}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @FlattenFullFillToSplat
-func.func @FlattenFullFillToSplat(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
+util.func private @FlattenFullFillToSplat(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK: %[[T:.+]] = stream.async.splat %arg2 : i32 -> !stream.resource<*>{%arg1}
%0 = stream.async.fill %arg2, %arg0[%c0 to %arg1 for %arg1] : i32 -> %arg0 as !stream.resource<*>{%arg1}
- // CHECK: return %[[T]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @ElideRedundantFill
-func.func @ElideRedundantFill(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
+util.func private @ElideRedundantFill(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK: %[[T:.+]] = stream.async.splat %arg2 : i32 -> !stream.resource<*>{%arg1}
%0 = stream.async.splat %arg2 : i32 -> !stream.resource<*>{%arg1}
// CHECK-NOT: stream.async.fill
%1 = stream.async.fill %arg2, %0[%c0 to %arg1 for %arg1] : i32 -> %0 as !stream.resource<*>{%arg1}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @ElideRedundantFillBitPatterns
-func.func @ElideRedundantFillBitPatterns(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @ElideRedundantFillBitPatterns(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK: %[[CD_I8:.+]] = arith.constant -51 : i8
%cCDCD_i16 = arith.constant 0xCDCD : i16
@@ -213,14 +213,14 @@
%0 = stream.async.splat %cCDCDCDCD_i32 : i32 -> !stream.resource<*>{%arg1}
// CHECK-NOT: stream.async.fill
%1 = stream.async.fill %cCDCD_i16, %0[%c0 to %arg1 for %arg1] : i16 -> %0 as !stream.resource<*>{%arg1}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @CoalesceAdjacentFills
-func.func @CoalesceAdjacentFills(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @CoalesceAdjacentFills(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
%c12 = arith.constant 12 : index
@@ -235,25 +235,25 @@
%2 = stream.async.fill %c0_i8, %1[%c12 to %c16 for %c4] : i8 -> %1 as !stream.resource<*>{%arg1}
// CHECK: %[[FILL_1:.+]] = stream.async.fill %c1_i8, %[[FILL_0]][%c16 to %c20 for %c4] : i8 -> %[[FILL_0]] as !stream.resource<*>{%arg1}
%3 = stream.async.fill %c1_i8, %2[%c16 to %c20 for %c4] : i8 -> %2 as !stream.resource<*>{%arg1}
- // CHECK: return %[[FILL_1]]
- return %3 : !stream.resource<*>
+ // CHECK: util.return %[[FILL_1]]
+ util.return %3 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @FoldAsyncUpdateOp
-func.func @FoldAsyncUpdateOp(%arg0: !stream.resource<*>, %arg1: !stream.resource<*>, %arg2: index) -> !stream.resource<*> {
+util.func private @FoldAsyncUpdateOp(%arg0: !stream.resource<*>, %arg1: !stream.resource<*>, %arg2: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.async.update
%0 = stream.async.update %arg1, %arg0[%c0 to %arg2] : !stream.resource<*>{%arg2} -> %arg0 as !stream.resource<*>{%arg2}
- // CHECK: return %arg1
- return %0 : !stream.resource<*>
+ // CHECK: util.return %arg1
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @CombineSplatUpdateFromToFill
-func.func @CombineSplatUpdateFromToFill(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @CombineSplatUpdateFromToFill(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -261,28 +261,28 @@
%0 = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%c128}
// CHECK: %[[T:.+]] = stream.async.fill %c123_i32, %arg0[%c0 to %c128 for %c128] : i32 -> %arg0 as !stream.resource<*>{%arg1}
%1 = stream.async.update %0, %arg0[%c0 to %c128] : !stream.resource<*>{%c128} -> %arg0 as !stream.resource<*>{%arg1}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @CombineSliceUpdateFromToCopy
-func.func @CombineSliceUpdateFromToCopy(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
+util.func private @CombineSliceUpdateFromToCopy(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK-NOT: stream.async.slice
%0 = stream.async.slice %arg0[%c0 to %c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c128}
// CHECK: %[[T:.+]] = stream.async.copy %arg0[%c0 to %c128], %arg2[%c0 to %c128], %c128 : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg3}
%1 = stream.async.update %0, %arg2[%c0 to %c128] : !stream.resource<*>{%c128} -> %arg2 as !stream.resource<*>{%arg3}
- // CHECK: return %[[T]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[T]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @AsyncCopyFullSourceToUpdate
-func.func @AsyncCopyFullSourceToUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func private @AsyncCopyFullSourceToUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
@@ -295,69 +295,69 @@
// CHECK: = stream.async.copy %arg2[%c16 to %arg3], %arg0[%c0 to %arg3], %c8 : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
%1 = stream.async.copy %arg2[%c16 to %arg3], %arg0[%c0 to %arg3], %c8 : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
- return %0, %1 : !stream.resource<*>, !stream.resource<*>
+ util.return %0, %1 : !stream.resource<*>, !stream.resource<*>
}
// -----
// CHECK-LABEL: @FoldAsyncTransferOp
-func.func @FoldAsyncTransferOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
+util.func private @FoldAsyncTransferOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
// CHECK-NOT: stream.async.transfer
%0 = stream.async.transfer %arg0 : !stream.resource<transient>{%arg1} -> !stream.resource<staging>{%arg1}
%1 = stream.async.transfer %0 : !stream.resource<staging>{%arg1} -> !stream.resource<transient>{%arg1}
- return %1 : !stream.resource<transient>
+ util.return %1 : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @RedundantTransferElision
-func.func @RedundantTransferElision(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
+util.func private @RedundantTransferElision(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.resource<transient> {
// CHECK-NOT: stream.async.transfer
%0 = stream.async.transfer %arg0 : !stream.resource<transient>{%arg1} -> !stream.resource<transient>{%arg1}
- return %0 : !stream.resource<transient>
+ util.return %0 : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @IntermediateTransferElision
// CHECK-SAME: (%[[SOURCE:.+]]: !stream.resource<constant>, %[[SIZE:.+]]: index)
-func.func @IntermediateTransferElision(%source: !stream.resource<constant>, %size: index) -> !stream.resource<external> {
+util.func private @IntermediateTransferElision(%source: !stream.resource<constant>, %size: index) -> !stream.resource<external> {
// CHECK: %[[TRANSFER:.+]] = stream.async.transfer %[[SOURCE]] : !stream.resource<constant>{%[[SIZE]]} -> !stream.resource<external>{%[[SIZE]]}
%transfer0 = stream.async.transfer %source : !stream.resource<constant>{%size} -> !stream.resource<staging>{%size}
// CHECK-NOT: stream.async.transfer
%transfer1 = stream.async.transfer %transfer0 : !stream.resource<staging>{%size} -> !stream.resource<external>{%size}
- // CHECK-NEXT: return %[[TRANSFER]]
- return %transfer1 : !stream.resource<external>
+ // CHECK-NEXT: util.return %[[TRANSFER]]
+ util.return %transfer1 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @FoldAsyncLoadBitcast
-func.func @FoldAsyncLoadBitcast(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
+util.func private @FoldAsyncLoadBitcast(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: %[[F32:.+]] = stream.async.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> f32
%0 = stream.async.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> i32
// CHECK-NOT: arith.bitcast
%1 = arith.bitcast %0 : i32 to f32
- // CHECK: return %[[F32]]
- return %1 : f32
+ // CHECK: util.return %[[F32]]
+ util.return %1 : f32
}
// -----
// CHECK-LABEL: @FoldAsyncStoreBitcast
-func.func @FoldAsyncStoreBitcast(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
+util.func private @FoldAsyncStoreBitcast(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
%0 = arith.bitcast %arg2 : f32 to i32
// CHECK: = stream.async.store %arg2, %arg0[%c0] : f32 -> %arg0 as !stream.resource<staging>{%arg1}
%1 = stream.async.store %0, %arg0[%c0] : i32 -> %arg0 as !stream.resource<staging>{%arg1}
- return %1 : !stream.resource<staging>
+ util.return %1 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @ElideImmediateAsyncExecuteWaits
-func.func @ElideImmediateAsyncExecuteWaits(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @ElideImmediateAsyncExecuteWaits(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NOT: stream.timepoint.immediate
@@ -369,13 +369,13 @@
// CHECK: stream.yield
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ChainAsyncExecuteWaits
-func.func @ChainAsyncExecuteWaits(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @ChainAsyncExecuteWaits(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NOT: stream.timepoint.await
@@ -387,13 +387,13 @@
// CHECK: stream.yield
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
+ util.return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @CloneCapturedAsyncExecuteSubviewOps
-func.func @CloneCapturedAsyncExecuteSubviewOps(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @CloneCapturedAsyncExecuteSubviewOps(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -407,26 +407,26 @@
// CHECK: stream.yield
stream.yield %1 : !stream.resource<*>{%c128}
} => !stream.timepoint
- return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
+ util.return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideNoOpAsyncExecuteOp
-func.func @ElideNoOpAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @ElideNoOpAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
// CHECK-NOT: stream.async.execute
%1:2 = stream.async.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<*>{%arg1}) -> %arg0{%arg1} {
stream.yield %arg3 : !stream.resource<*>{%arg1}
} => !stream.timepoint
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
- // CHECK: return %arg0, %[[IMM]]
- return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
+ // CHECK: util.return %arg0, %[[IMM]]
+ util.return %1#0, %1#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @TieRegionResultsAsyncExecuteOp
-func.func @TieRegionResultsAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @TieRegionResultsAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: = stream.async.execute with(%arg0 as %arg2: !stream.resource<*>{%arg1}) -> %arg0{%arg1}
@@ -436,13 +436,13 @@
// CHECK: stream.yield %[[T]]
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideUnusedAsyncExecuteOp
-func.func @ElideUnusedAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) {
+util.func private @ElideUnusedAsyncExecuteOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-NOT: stream.async.execute
@@ -450,13 +450,13 @@
%1 = stream.async.dispatch @executable::@dispatch0[%c1, %c1, %c1](%arg3[%c0 to %arg1 for %arg1]) : (!stream.resource<*>{%arg1}) -> !stream.resource<*>{%arg1}
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return
+ util.return
}
// -----
// CHECK-LABEL: @TieRegionResultsAsyncConcurrentOp
-func.func @TieRegionResultsAsyncConcurrentOp(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @TieRegionResultsAsyncConcurrentOp(%arg0: !stream.resource<*>, %arg1: index) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: = stream.async.execute with(%arg0 as %arg2: !stream.resource<*>{%arg1}) -> %arg0{%arg1}
@@ -471,13 +471,13 @@
// CHECK: stream.yield %[[EXEC_T]]
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideUnusedAsyncConcurrentOp
-func.func @ElideUnusedAsyncConcurrentOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @ElideUnusedAsyncConcurrentOp(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: stream.async.execute
@@ -492,5 +492,5 @@
}
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_ops.mlir
index f136ea8..0c6085a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/async_ops.mlir
@@ -1,81 +1,81 @@
// RUN: iree-opt --split-input-file %s --verify-diagnostics | FileCheck %s
// CHECK-LABEL: @asyncAlloca
-func.func @asyncAlloca(%arg0: index) -> !stream.resource<transient> {
+util.func private @asyncAlloca(%arg0: index) -> !stream.resource<transient> {
// CHECK: = stream.async.alloca : !stream.resource<transient>{%arg0}
%0 = stream.async.alloca : !stream.resource<transient>{%arg0}
- return %0 : !stream.resource<transient>
+ util.return %0 : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @asyncConstant
-func.func @asyncConstant(%arg0: index) -> !stream.resource<transient> {
+util.func private @asyncConstant(%arg0: index) -> !stream.resource<transient> {
// CHECK: = stream.async.constant : !stream.resource<transient>{%arg0} = dense<3> : tensor<8xi32>
%0 = stream.async.constant : !stream.resource<transient>{%arg0} = dense<3> : tensor<8xi32>
- return %0 : !stream.resource<transient>
+ util.return %0 : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @asyncSplat
-func.func @asyncSplat(%arg0: index, %arg1: i32) -> !stream.resource<*> {
+util.func private @asyncSplat(%arg0: index, %arg1: i32) -> !stream.resource<*> {
// CHECK: = stream.async.splat %arg1 : i32 -> !stream.resource<*>{%arg0}
%0 = stream.async.splat %arg1 : i32 -> !stream.resource<*>{%arg0}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncClone
-func.func @asyncClone(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncClone(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
// CHECK: = stream.async.clone %arg0 : !stream.resource<*>{%arg1} -> !stream.resource<*>{%arg1}
%0 = stream.async.clone %arg0 : !stream.resource<*>{%arg1} -> !stream.resource<*>{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncSlice
-func.func @asyncSlice(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncSlice(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.async.slice %arg0[%c0 to %c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c128}
%0 = stream.async.slice %arg0[%c0 to %c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c128}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncFill
-func.func @asyncFill(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
+util.func private @asyncFill(%arg0: !stream.resource<*>, %arg1: index, %arg2: i32) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.async.fill %arg2, %arg0[%c0 to %c128 for %c128] : i32 -> %arg0 as !stream.resource<*>{%arg1}
%0 = stream.async.fill %arg2, %arg0[%c0 to %c128 for %c128] : i32 -> %arg0 as !stream.resource<*>{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncUpdate
-func.func @asyncUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
+util.func private @asyncUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.async.update %arg2, %arg0[%c0 to %c128] : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
%0 = stream.async.update %arg2, %arg0[%c0 to %c128] : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncCopy
-func.func @asyncCopy(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
+util.func private @asyncCopy(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.async.copy %arg2[%c0 to %c128], %arg0[%c0 to %c128], %c128 : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
%0 = stream.async.copy %arg2[%c0 to %c128], %arg0[%c0 to %c128], %c128 : !stream.resource<*>{%arg3} -> %arg0 as !stream.resource<*>{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -83,7 +83,7 @@
// This covers all_gather, all_reduce, and reduce_scatter variants.
// CHECK-LABEL: @asyncCollectiveAllGather
-func.func @asyncCollectiveAllGather(
+util.func private @asyncCollectiveAllGather(
// CHECK-SAME: %[[CHANNEL:.+]]: !stream.channel,
%channel: !stream.channel,
// CHECK-SAME: %[[SEND:[a-z0-9]+]]: !stream.resource<*>, %[[SEND_SIZE:[a-z0-9]+]]: index,
@@ -103,7 +103,7 @@
%recv[%c0 to %recv_size for %recv_size] :
// CHECK-SAME: !stream.resource<*>{%[[SEND_SIZE]]} -> %[[RECV]] as !stream.resource<*>{%[[RECV_SIZE]]}
!stream.resource<*>{%send_size} -> %recv as !stream.resource<*>{%recv_size}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -111,7 +111,7 @@
// This covers broadcast and reduce variants.
// CHECK-LABEL: @asyncCollectiveBroadcast
-func.func @asyncCollectiveBroadcast(
+util.func private @asyncCollectiveBroadcast(
// CHECK-SAME: %[[CHANNEL:.+]]: !stream.channel,
%channel: !stream.channel,
// CHECK-SAME: %[[RANK:[a-z0-9]+]]: i32,
@@ -133,42 +133,42 @@
%recv[%c0 to %recv_size for %recv_size] :
// CHECK-SAME: !stream.resource<*>{%[[SEND_SIZE]]} -> %[[RECV]] as !stream.resource<*>{%[[RECV_SIZE]]}
!stream.resource<*>{%send_size} -> %recv as !stream.resource<*>{%recv_size}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncTransfer
-func.func @asyncTransfer(%arg0: !stream.resource<constant>, %arg1: index) -> !stream.resource<staging> {
+util.func private @asyncTransfer(%arg0: !stream.resource<constant>, %arg1: index) -> !stream.resource<staging> {
// CHECK: = stream.async.transfer %arg0 : !stream.resource<constant>{%arg1} -> !stream.resource<staging>{%arg1}
%0 = stream.async.transfer %arg0 : !stream.resource<constant>{%arg1} -> !stream.resource<staging>{%arg1}
- return %0 : !stream.resource<staging>
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @asyncLoad
-func.func @asyncLoad(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
+util.func private @asyncLoad(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: = stream.async.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> f32
%0 = stream.async.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> f32
- return %0 : f32
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @asyncStore
-func.func @asyncStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
+util.func private @asyncStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: = stream.async.store %arg2, %arg0[%c0] : f32 -> %arg0 as !stream.resource<staging>{%arg1}
%0 = stream.async.store %arg2, %arg0[%c0] : f32 -> %arg0 as !stream.resource<staging>{%arg1}
- return %0 : !stream.resource<staging>
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @asyncDispatch
-func.func @asyncDispatch(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncDispatch(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -176,17 +176,17 @@
%c4 = arith.constant 4 : index
// CHECK: = stream.async.dispatch @executable::@dispatch[%c1, %c2, %c3](%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
%0 = stream.async.dispatch @executable::@dispatch[%c1, %c2, %c3](%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncDispatchNoInputs
-func.func @asyncDispatchNoInputs(%arg0: index) -> !stream.resource<*> {
+util.func private @asyncDispatchNoInputs(%arg0: index) -> !stream.resource<*> {
%c1 = arith.constant 1 : index
// CHECK: = stream.async.dispatch @executable::@dispatch[%c1]() : () -> !stream.resource<*>{%arg0}
%0 = stream.async.dispatch @executable::@dispatch[%c1]() : () -> !stream.resource<*>{%arg0}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -196,21 +196,21 @@
stream.return %arg0, %arg1, %arg0 : index, index, index
}
builtin.module {
- func.func @dispatch() {
- return
+ util.func private @dispatch() {
+ util.return
}
}
}
// CHECK-LABEL: @asyncDispatchWithWorkgroupCount
-func.func @asyncDispatchWithWorkgroupCount(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncDispatchWithWorkgroupCount(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
// CHECK: = stream.async.dispatch @executable::@dispatch[%c1, %c2](%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
%0 = stream.async.dispatch @executable::@dispatch[%c1, %c2](%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -220,31 +220,31 @@
stream.return %arg0, %arg0, %arg0 : index, index, index
}
builtin.module {
- func.func @dispatch() {
- return
+ util.func private @dispatch() {
+ util.return
}
}
}
-func.func @asyncDispatchWithInvalidWorkload(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncDispatchWithInvalidWorkload(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
// expected-error @+1 {{op workload mismatch; entry point expects 1 arguments but dispatch provides 2}}
%0 = stream.async.dispatch @executable::@dispatch[%c1, %c2](%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncDispatchNoWorkload
-func.func @asyncDispatchNoWorkload(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncDispatchNoWorkload(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
// CHECK: = stream.async.dispatch @executable::@dispatch(%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
%0 = stream.async.dispatch @executable::@dispatch(%arg0[%c0 to %arg1 for %arg1], %c4) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -253,17 +253,17 @@
// CHECK-LABEL: @asyncCall
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>, %[[SIZE0:.+]]: index)
-func.func @asyncCall(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @asyncCall(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK: = stream.async.call @asyncExtern(%[[ARG0]][%c0 to %[[SIZE0]] for %[[SIZE0]]], %[[SIZE0]]) : (!stream.resource<*>{%[[SIZE0]]}, index) -> %[[ARG0]]{%[[SIZE0]]}
%call = stream.async.call @asyncExtern(%arg0[%c0 to %arg1 for %arg1], %arg1) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
- return %call : !stream.resource<*>
+ util.return %call : !stream.resource<*>
}
// -----
// CHECK-LABEL: @asyncExecute
-func.func @asyncExecute(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @asyncExecute(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.resource<*>, !stream.timepoint) {
// CHECK: = stream.async.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<*>{%arg1}) -> %arg0{%arg1} {
%0:2 = stream.async.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<*>{%arg1}) -> %arg0 as !stream.resource<*>{%arg1} {
// CHECK: %[[W:.+]] = stream.async.concurrent with(%arg3 as %arg4: !stream.resource<*>{%arg1}) -> %arg3{%arg1} {
@@ -274,13 +274,13 @@
// CHECK: stream.yield %[[W]] : !stream.resource<*>{%arg1}
stream.yield %1 : !stream.resource<*>{%arg1}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @asyncExecuteNoCaptures
-func.func @asyncExecuteNoCaptures(%arg0: index, %arg1: i32) -> (!stream.resource<*>, !stream.timepoint) {
+util.func private @asyncExecuteNoCaptures(%arg0: index, %arg1: i32) -> (!stream.resource<*>, !stream.timepoint) {
// CHECK: = stream.async.execute with() -> !stream.resource<*>{%arg0} {
%0:2 = stream.async.execute with() -> !stream.resource<*>{%arg0} {
// CHECK: %[[T:.+]] = stream.async.splat
@@ -288,17 +288,17 @@
// CHECK: stream.yield %[[T]] : !stream.resource<*>{%arg0}
stream.yield %1 : !stream.resource<*>{%arg0}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// -----
// CHECK-LABEL: @asyncExecuteNoResults
-func.func @asyncExecuteNoResults(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.timepoint) {
+util.func private @asyncExecuteNoResults(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.timepoint) -> (!stream.timepoint) {
// CHECK: = stream.async.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<*>{%arg1}) {
%0:1 = stream.async.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<*>{%arg1}) {
// CHECK: stream.yield
stream.yield
} => !stream.timepoint
- return %0#0 : !stream.timepoint
+ util.return %0#0 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_folding.mlir
index 282f5a7..1acb552 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_folding.mlir
@@ -2,42 +2,42 @@
// CHECK-LABEL: @FoldChannelRankOp
// CHECK-SAME: (%[[RANK:.+]]: index)
-func.func @FoldChannelRankOp(%rank: index) -> index {
+util.func private @FoldChannelRankOp(%rank: index) -> index {
%channel = stream.channel.create rank(%rank) : !stream.channel
%queried_rank = stream.channel.rank %channel : index
- // CHECK: return %[[RANK]]
- return %queried_rank : index
+ // CHECK: util.return %[[RANK]]
+ util.return %queried_rank : index
}
// -----
// CHECK-LABEL: @NoFoldChannelRankOp
-func.func @NoFoldChannelRankOp() -> index {
+util.func private @NoFoldChannelRankOp() -> index {
%channel = stream.channel.create : !stream.channel
// CHECK: %[[RANK:.+]] = stream.channel.rank
%queried_rank = stream.channel.rank %channel : index
- // CHECK: return %[[RANK]]
- return %queried_rank : index
+ // CHECK: util.return %[[RANK]]
+ util.return %queried_rank : index
}
// -----
// CHECK-LABEL: @FoldChannelCountOp
// CHECK-SAME: (%[[COUNT:.+]]: index)
-func.func @FoldChannelCountOp(%count: index) -> index {
+util.func private @FoldChannelCountOp(%count: index) -> index {
%channel = stream.channel.create count(%count) : !stream.channel
%queried_count = stream.channel.count %channel : index
- // CHECK: return %[[COUNT]]
- return %queried_count : index
+ // CHECK: util.return %[[COUNT]]
+ util.return %queried_count : index
}
// -----
// CHECK-LABEL: @NoFoldChannelCountOp
-func.func @NoFoldChannelCountOp() -> index {
+util.func private @NoFoldChannelCountOp() -> index {
%channel = stream.channel.create : !stream.channel
// CHECK: %[[COUNT:.+]] = stream.channel.count
%queried_count = stream.channel.count %channel : index
- // CHECK: return %[[COUNT]]
- return %queried_count : index
+ // CHECK: util.return %[[COUNT]]
+ util.return %queried_count : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_ops.mlir
index 7c4d7c5..486a03f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/channel_ops.mlir
@@ -2,42 +2,42 @@
// CHECK-LABEL: @channel_create
// CHECK-SAME: (%[[RANK:.+]]: index, %[[COUNT:.+]]: index)
-func.func @channel_create(%rank: index, %count: index) {
+util.func private @channel_create(%rank: index, %count: index) {
// CHECK: %channel = stream.channel.create on(#hal.affinity.queue<[0, 1]>) rank(%[[RANK]]) count(%[[COUNT]]) : !stream.channel
%channel = stream.channel.create on(#hal.affinity.queue<[0, 1]>) rank(%rank) count(%count) : !stream.channel
- return
+ util.return
}
// -----
// CHECK-LABEL: @channel_split
// CHECK-SAME: (%[[BASE_CHANNEL:.+]]: !stream.channel)
-func.func @channel_split(%base_channel: !stream.channel) {
+util.func private @channel_split(%base_channel: !stream.channel) {
// CHECK-DAG: %[[COLOR:.+]] = arith.constant 100 : index
%color = arith.constant 100 : index
// CHECK-DAG: %[[KEY:.+]] = arith.constant 101 : index
%key = arith.constant 101 : index
// CHECK: %channel = stream.channel.split %[[BASE_CHANNEL]], %[[COLOR]], %[[KEY]] : !stream.channel -> !stream.channel
%split_channel = stream.channel.split %base_channel, %color, %key : !stream.channel -> !stream.channel
- return
+ util.return
}
// -----
// CHECK-LABEL: @channel_rank
// CHECK-SAME: (%[[CHANNEL:.+]]: !stream.channel)
-func.func @channel_rank(%channel: !stream.channel) -> index {
+util.func private @channel_rank(%channel: !stream.channel) -> index {
// CHECK: = stream.channel.rank %[[CHANNEL]] : index
%rank = stream.channel.rank %channel : index
- return %rank : index
+ util.return %rank : index
}
// -----
// CHECK-LABEL: @channel_count
// CHECK-SAME: (%[[CHANNEL:.+]]: !stream.channel)
-func.func @channel_count(%channel: !stream.channel) -> index {
+util.func private @channel_count(%channel: !stream.channel) -> index {
// CHECK: = stream.channel.count %[[CHANNEL]] : index
%count = stream.channel.count %channel : index
- return %count : index
+ util.return %count : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_folding.mlir
index 0d76525..5f8b27f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_folding.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --canonicalize %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @FoldSubviewsIntoCmdTOp
-func.func @FoldSubviewsIntoCmdTOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @FoldSubviewsIntoCmdTOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c1000 = arith.constant 1000 : index
@@ -19,13 +19,13 @@
// CHECK: stream.cmd.fill %c255_i32, %arg2[%c1064 for %c2000] : i32 -> !stream.resource<transient>{%arg1}
stream.cmd.fill %c255_i32, %arg2[%c1000 for %c2000] : i32 -> !stream.resource<transient>{%c3000}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
// CHECK-LABEL: @FoldSubviewsIntoCmdCopyOp
-func.func @FoldSubviewsIntoCmdCopyOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @FoldSubviewsIntoCmdCopyOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
@@ -39,13 +39,13 @@
// CHECK: stream.cmd.copy %arg2[%c1064], %arg2[%c2128], %c1000 : !stream.resource<transient>{%arg1} -> !stream.resource<transient>{%arg1}
stream.cmd.copy %arg2[%c1000], %arg3[%c2000], %c1000 : !stream.resource<transient>{%c3000} -> !stream.resource<transient>{%c4000}
} => !stream.timepoint
- return %2 : !stream.timepoint
+ util.return %2 : !stream.timepoint
}
// -----
// CHECK-LABEL: @FoldSubviewsIntoCmdDispatchOp
-func.func @FoldSubviewsIntoCmdDispatchOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @FoldSubviewsIntoCmdDispatchOp(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c64 = arith.constant 64 : index
@@ -65,13 +65,13 @@
wo %arg3[%c2000 for %c1000] : !stream.resource<transient>{%c4000}
}
} => !stream.timepoint
- return %2 : !stream.timepoint
+ util.return %2 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideImmediateCmdExecuteWaits
-func.func @ElideImmediateCmdExecuteWaits(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @ElideImmediateCmdExecuteWaits(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
@@ -79,13 +79,13 @@
%0 = stream.cmd.execute await(%imm) => with(%arg0 as %arg2: !stream.resource<transient>{%arg1}) {
stream.cmd.discard %arg2[%c0 for %arg1] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ChainCmdExecuteWaits
-func.func @ChainCmdExecuteWaits(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) -> !stream.timepoint {
+util.func private @ChainCmdExecuteWaits(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK-NOT: stream.timepoint.await
@@ -95,13 +95,13 @@
// CHECK: stream.cmd.discard
stream.cmd.discard %arg3[%c0 for %c128] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
// CHECK-LABEL: @CloneCapturedCmdExecuteSubviewOps
-func.func @CloneCapturedCmdExecuteSubviewOps(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @CloneCapturedCmdExecuteSubviewOps(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
@@ -115,30 +115,30 @@
// CHECK: stream.cmd.discard %arg2[%c1064 for %c2000] : !stream.resource<transient>{%arg1}
stream.cmd.discard %arg3[%c1000 for %c2000] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideNoOpCmdExecuteOp
-func.func @ElideNoOpCmdExecuteOp(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) -> !stream.timepoint {
+util.func private @ElideNoOpCmdExecuteOp(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) -> !stream.timepoint {
// CHECK-NOT: stream.cmd.execute
%0 = stream.cmd.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<transient>{%arg1}) {
} => !stream.timepoint
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
- // CHECK: return %[[IMM]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[IMM]]
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideUnusedCmdExecuteOp
-func.func @ElideUnusedCmdExecuteOp(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) {
+util.func private @ElideUnusedCmdExecuteOp(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.timepoint) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK-NOT: stream.cmd.execute
%0 = stream.cmd.execute await(%arg2) => with(%arg0 as %arg3: !stream.resource<transient>{%arg1}) {
stream.cmd.discard %arg3[%c0 for %c128] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_ops.mlir
index 9298c0f..d75df4c 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/cmd_ops.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --allow-unregistered-dialect %s | iree-opt --split-input-file --allow-unregistered-dialect | FileCheck %s
// CHECK-LABEL: @cmdMemoryControl
-func.func @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%0 = stream.cmd.execute with(%arg0 as %arg2: !stream.resource<transient>{%arg1}) {
@@ -12,13 +12,13 @@
// CHECK: stream.cmd.discard %arg2[%c0 for %c128] : !stream.resource<transient>{%arg1}
stream.cmd.discard %arg2[%c0 for %c128] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdFill
-func.func @cmdFill(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func private @cmdFill(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c255_i32 = arith.constant 255 : i32
@@ -26,27 +26,27 @@
// CHECK: stream.cmd.fill %c255_i32, %arg2[%c0 for %c128] : i32 -> !stream.resource<transient>{%arg1}
stream.cmd.fill %c255_i32, %arg2[%c0 for %c128] : i32 -> !stream.resource<transient>{%arg1}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdCopy
-func.func @cmdCopy(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index) -> !stream.timepoint {
+util.func private @cmdCopy(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%0 = stream.cmd.execute with(%arg0 as %arg4: !stream.resource<transient>{%arg1}, %arg2 as %arg5: !stream.resource<staging>{%arg3}) {
// CHECK: stream.cmd.copy %arg4[%c0], %arg5[%c0], %c128 : !stream.resource<transient>{%arg1} -> !stream.resource<staging>{%arg3}
stream.cmd.copy %arg4[%c0], %arg5[%c0], %c128 : !stream.resource<transient>{%arg1} -> !stream.resource<staging>{%arg3}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdCollective
// CHECK-SAME: %[[CHANNEL:[a-z0-9]+]]: !stream.channel
-func.func @cmdCollective(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<transient>, %arg3: index, %channel: !stream.channel) -> !stream.timepoint {
+util.func private @cmdCollective(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<transient>, %arg3: index, %channel: !stream.channel) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: stream.cmd.execute
@@ -91,13 +91,13 @@
}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdDispatch
-func.func @cmdDispatch(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<external>, %arg3: index) -> !stream.timepoint {
+util.func private @cmdDispatch(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<external>, %arg3: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -115,7 +115,7 @@
wo %arg5[%c0 for %c128] : !stream.resource<external>{%arg3}
}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
@@ -123,8 +123,8 @@
// CHECK: stream.cmd.func private @cmdFunc(%arg0[%arg1 for %arg2]: !stream.resource<*>, %arg3: i32, %arg4[%arg5 for %arg6]: !stream.resource<*>, %arg7: !custom.type, %arg8[%arg9 for %arg10]: !stream.resource<*>)
stream.cmd.func private @cmdFunc(%arg0[%arg1 for %arg2]: !stream.resource<*>, %arg3: i32, %arg4[%arg5 for %arg6]: !stream.resource<*>, %arg7: !custom.type, %arg8[%arg9 for %arg10]: !stream.resource<*>)
-// CHECK-LABEL: func.func @cmdCall
-func.func @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<external>, %arg3: !custom.type, %arg4: !stream.resource<external>) -> !stream.timepoint {
+// CHECK-LABEL: util.func private @cmdCall
+util.func private @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<external>, %arg3: !custom.type, %arg4: !stream.resource<external>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%size0 = arith.constant 100 : index
%size1 = arith.constant 101 : index
@@ -134,13 +134,13 @@
// CHECK: stream.cmd.call @cmdFunc(ro %[[STREAM0]][%c0 for %[[SIZE0]]], %arg1, rw %[[STREAM1]][%c0 for %[[SIZE1]]], %arg3, wo %[[STREAM2]][%c0 for %[[SIZE2]]]) : (!stream.resource<external>{%[[SIZE0]]}, i32, !stream.resource<external>{%[[SIZE1]]}, !custom.type, !stream.resource<external>{%[[SIZE2]]}) -> ()
stream.cmd.call @cmdFunc(ro %stream0[%c0 for %size0], %arg1, rw %stream1[%c0 for %size1], %arg3, wo %stream2[%c0 for %size2]) : (!stream.resource<external>{%size0}, i32, !stream.resource<external>{%size1}, !custom.type, !stream.resource<external>{%size2}) -> ()
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdExecute
-func.func @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
+util.func private @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.cmd.execute await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
@@ -161,5 +161,5 @@
}
// CHECK: } => !stream.timepoint
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/context_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/context_ops.mlir
index c324ff3..ab523ec 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/context_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/context_ops.mlir
@@ -1,12 +1,12 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @context_resolve
-func.func @context_resolve() {
+util.func private @context_resolve() {
// CHECK: = stream.context.resolve : !hal.allocator
%allocator = stream.context.resolve : !hal.allocator
// CHECK: = stream.context.resolve on(#hal.affinity.queue<*>) : !hal.device, i64
%device1, %queue_affinity_any = stream.context.resolve on(#hal.affinity.queue<*>) : !hal.device, i64
// CHECK: = stream.context.resolve on(#hal.affinity.queue<[4, 5]>) : !hal.device, i64
%device0, %queue_affinity_45 = stream.context.resolve on(#hal.affinity.queue<[4, 5]>) : !hal.device, i64
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/executable_ops.mlir
index 985e3de..c11de1d 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/executable_ops.mlir
@@ -6,15 +6,15 @@
stream.executable.export public @dispatch
// CHECK-NEXT: builtin.module
builtin.module {
- // CHECK-NEXT: func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: index) {
- func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: index) {
+ // CHECK-NEXT: util.func private @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: index) {
+ util.func private @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: index) {
%c0 = arith.constant 0 : index
// CHECK-DAG: = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:tensor<?x5x64xf32>>{%arg2}
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:tensor<?x5x64xf32>>{%arg2}
// CHECK-DAG: = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x5x4xf32>>{%arg2}
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x5x4xf32>>{%arg2}
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
}
}
@@ -32,10 +32,10 @@
}
// CHECK: builtin.module
builtin.module {
- // CHECK-NEXT: func.func @dispatch
- func.func @dispatch() {
- // CHECK: return
- return
+ // CHECK-NEXT: util.func private @dispatch
+ util.func private @dispatch() {
+ // CHECK: util.return
+ util.return
}
}
}
@@ -48,8 +48,8 @@
stream.return %arg0, %arg1 : index, index
}
builtin.module {
- func.func @dispatch() {
- return
+ util.func private @dispatch() {
+ util.return
}
}
}
@@ -62,8 +62,8 @@
stream.return %arg0, %arg1, %arg0 : index, f32, index
}
builtin.module {
- func.func @dispatch() {
- return
+ util.func private @dispatch() {
+ util.return
}
}
}
@@ -73,15 +73,15 @@
stream.executable private @executable {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding, %arg1: index) {
+ util.func private @dispatch(%arg0: !stream.binding, %arg1: index) {
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:tensor<?x5x64xf32>>{%arg1}
- return
+ util.return
}
}
}
-func.func @cmdDispatchExecutableSignatureMismatch(%arg0: !stream.resource<transient>,
+util.func private @cmdDispatchExecutableSignatureMismatch(%arg0: !stream.resource<transient>,
%arg1: index,
%arg2: !stream.resource<external>,
%arg3: index) -> !stream.timepoint {
@@ -96,5 +96,5 @@
wo %arg5[%c0 for %c128] : !stream.resource<external>{%arg3}
}
} => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/file_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/file_ops.mlir
index dbed4df..94680ce 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/file_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/file_ops.mlir
@@ -2,36 +2,36 @@
// CHECK-LABEL: @file_constant
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer)
-func.func @file_constant(%buffer: !util.buffer) {
+util.func private @file_constant(%buffer: !util.buffer) {
%c0 = arith.constant 0 : index
%c1088 = arith.constant 1088 : index
// CHECK: %file = stream.file.constant %[[BUFFER]][%c0 for %c1088] : !util.buffer{%c1088} -> !stream.file
%file = stream.file.constant %buffer[%c0 for %c1088] : !util.buffer{%c1088} -> !stream.file
- return
+ util.return
}
// -----
// CHECK-LABEL: @file_read
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[FILE:.+]]: !stream.file, %[[RESOURCE:.+]]: !stream.resource<variable>)
-func.func @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) {
+util.func private @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%c1088 = arith.constant 1088 : index
// CHECK: = stream.file.read await(%[[WAIT]]) => %[[FILE]][%c0_i64], %[[RESOURCE]][%c0], %c1088 : !stream.file -> !stream.resource<variable>{%c1088} => !stream.timepoint
%0 = stream.file.read await(%wait) => %file[%c0_i64], %resource[%c0], %c1088 : !stream.file -> !stream.resource<variable>{%c1088} => !stream.timepoint
- return
+ util.return
}
// -----
// CHECK-LABEL: @file_write
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[FILE:.+]]: !stream.file, %[[RESOURCE:.+]]: !stream.resource<variable>)
-func.func @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) {
+util.func private @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) {
%c0 = arith.constant 0 : index
%c0_i64 = arith.constant 0 : i64
%c1088 = arith.constant 1088 : index
// CHECK: = stream.file.write await(%[[WAIT]]) => %[[RESOURCE]][%c0], %[[FILE]][%c0_i64], %c1088 : !stream.resource<variable>{%c1088} -> !stream.file => !stream.timepoint
%0 = stream.file.write await(%wait) => %resource[%c0], %file[%c0_i64], %c1088 : !stream.resource<variable>{%c1088} -> !stream.file => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_folding.mlir
index cb9bf80..dfabc9a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_folding.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @FoldParameterLoadTargetSubview
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func @FoldParameterLoadTargetSubview(%wait: !stream.timepoint, %offset0: index, %length0: index, %offset1: index, %length1: index) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
+util.func private @FoldParameterLoadTargetSubview(%wait: !stream.timepoint, %offset0: index, %length0: index, %offset1: index, %length1: index) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -23,15 +23,15 @@
%subview0 = stream.resource.subview %results#0[%offset0] : !stream.resource<constant>{%c100} -> !stream.resource<constant>{%length0}
// CHECK-NOT: stream.resource.subview
%subview1 = stream.resource.subview %results#1[%offset1] : !stream.resource<constant>{%c200} -> !stream.resource<constant>{%length1}
- // CHECK: return %[[RESULTS]]#0, %[[RESULTS]]#1, %[[SIGNAL]]
- return %subview0, %subview1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ // CHECK: util.return %[[RESULTS]]#0, %[[RESULTS]]#1, %[[SIGNAL]]
+ util.return %subview0, %subview1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @FoldParameterReadTargetSubview
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[TARGET:.+]]: !stream.resource<transient>, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @FoldParameterReadTargetSubview(%wait: !stream.timepoint, %target: !stream.resource<transient>, %offset: index, %length: index) -> !stream.timepoint {
+util.func private @FoldParameterReadTargetSubview(%wait: !stream.timepoint, %target: !stream.resource<transient>, %offset: index, %length: index) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -43,14 +43,14 @@
%subview = stream.resource.subview %target[%offset] : !stream.resource<transient>{%length} -> !stream.resource<transient>{%c300}
// CHECK: = stream.parameter.read await(%[[WAIT]]) => "scope"::"key"[%[[PARAMETER_OFFSET]]] -> %[[TARGET]][%[[RESOURCE_OFFSET]] for %c200] : !stream.resource<transient>{%[[LENGTH]]} => !stream.timepoint
%timepoint = stream.parameter.read await(%wait) => "scope"::"key"[%c50_i64] -> %subview[%c100 for %c200] : !stream.resource<transient>{%c300} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @FoldParameterWriteSourceSubview
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[SOURCE:.+]]: !stream.resource<transient>, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @FoldParameterWriteSourceSubview(%wait: !stream.timepoint, %source: !stream.resource<transient>, %offset: index, %length: index) -> !stream.timepoint {
+util.func private @FoldParameterWriteSourceSubview(%wait: !stream.timepoint, %source: !stream.resource<transient>, %offset: index, %length: index) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -62,5 +62,5 @@
%subview = stream.resource.subview %source[%offset] : !stream.resource<transient>{%length} -> !stream.resource<transient>{%c300}
// CHECK: = stream.parameter.write await(%[[WAIT]]) => %[[SOURCE]][%[[RESOURCE_OFFSET]] for %c200] : !stream.resource<transient>{%[[LENGTH]]} -> "scope"::"key"[%[[PARAMETER_OFFSET]]] => !stream.timepoint
%timepoint = stream.parameter.write await(%wait) => %subview[%c100 for %c200] : !stream.resource<transient>{%c300} -> "scope"::"key"[%c50_i64] => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_ops.mlir
index eff0bb0..206103f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/parameter_ops.mlir
@@ -11,7 +11,7 @@
// CHECK-LABEL: @parameterLoad
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint)
-func.func @parameterLoad(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
+util.func private @parameterLoad(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -24,14 +24,14 @@
"scope"::"key0"[%c50_i64] : !stream.resource<constant>{%c100},
"scope"::"key1"[%c51_i64] : !stream.resource<constant>{%c200}
} => !stream.timepoint
- return %results#0, %results#1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ util.return %results#0, %results#1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterLoadNoScope
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint)
-func.func @parameterLoadNoScope(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.timepoint) {
+util.func private @parameterLoadNoScope(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.timepoint) {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
// CHECK: = stream.parameter.load await(%[[WAIT]]) => {
@@ -40,42 +40,42 @@
%result, %result_timepoint = stream.parameter.load await(%wait) => {
"key"[%c50_i64] : !stream.resource<constant>{%c100}
} => !stream.timepoint
- return %result, %result_timepoint : !stream.resource<constant>, !stream.timepoint
+ util.return %result, %result_timepoint : !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterRead
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[TARGET:.+]]: !stream.resource<transient>)
-func.func @parameterRead(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func private @parameterRead(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
%c300 = arith.constant 300 : index
// CHECK: = stream.parameter.read await(%[[WAIT]]) => "scope"::"key"[%c50_i64] -> %[[TARGET]][%c100 for %c200] : !stream.resource<transient>{%c300} => !stream.timepoint
%timepoint = stream.parameter.read await(%wait) => "scope"::"key"[%c50_i64] -> %target[%c100 for %c200] : !stream.resource<transient>{%c300} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterWrite
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[SOURCE:.+]]: !stream.resource<transient>)
-func.func @parameterWrite(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
+util.func private @parameterWrite(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
%c300 = arith.constant 300 : index
// CHECK: = stream.parameter.write await(%[[WAIT]]) => %[[SOURCE]][%c100 for %c200] : !stream.resource<transient>{%c300} -> "scope"::"key"[%c50_i64] => !stream.timepoint
%timepoint = stream.parameter.write await(%wait) => %source[%c100 for %c200] : !stream.resource<transient>{%c300} -> "scope"::"key"[%c50_i64] => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterGather
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[TARGET:.+]]: !stream.resource<transient>)
-func.func @parameterGather(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func private @parameterGather(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -96,14 +96,14 @@
"scope"::"key1"[%c51_i64] -> %target[%c101 for %c201] : !stream.resource<transient>{%c300},
"scope"::"key2"[%c52_i64] -> %target[%c102 for %c202] : !stream.resource<transient>{%c300}
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterGatherNoScope
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[TARGET:.+]]: !stream.resource<transient>)
-func.func @parameterGatherNoScope(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func private @parameterGatherNoScope(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -119,14 +119,14 @@
"key0"[%c50_i64] -> %target[%c100 for %c200] : !stream.resource<transient>{%c300},
"key1"[%c51_i64] -> %target[%c101 for %c201] : !stream.resource<transient>{%c300}
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterScatter
// CHECK-SAME: (%[[WAIT:.+]]: !stream.timepoint, %[[SOURCE:.+]]: !stream.resource<transient>)
-func.func @parameterScatter(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
+util.func private @parameterScatter(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -147,5 +147,5 @@
%source[%c101 for %c201] : !stream.resource<transient>{%c300} -> "scope"::"key1"[%c51_i64],
%source[%c102 for %c202] : !stream.resource<transient>{%c300} -> "scope"::"key2"[%c52_i64]
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_folding.mlir
index 8db5ea7..0220256 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_folding.mlir
@@ -1,20 +1,20 @@
// RUN: iree-opt --split-input-file --canonicalize %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @FoldResourceSizeOp
-func.func @FoldResourceSizeOp(%arg0: !stream.resource<staging>, %arg1: index) -> (index, i32) {
+util.func private @FoldResourceSizeOp(%arg0: !stream.resource<staging>, %arg1: index) -> (index, i32) {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.resource.size
%0 = stream.resource.size %arg0 : !stream.resource<staging>
// CHECK: %[[LOAD:.+]] = stream.resource.load
%1 = stream.resource.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> i32
- // CHECK: return %arg1, %[[LOAD]]
- return %0, %1 : index, i32
+ // CHECK: util.return %arg1, %[[LOAD]]
+ util.return %0, %1 : index, i32
}
// -----
// CHECK-LABEL: @SelectResourceSizeOp
-func.func @SelectResourceSizeOp(%arg0: !stream.resource<staging>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: i1) -> (!stream.resource<staging>, index) {
+util.func private @SelectResourceSizeOp(%arg0: !stream.resource<staging>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: i1) -> (!stream.resource<staging>, index) {
// CHECK: %[[ARG0_T:.+]] = stream.async.transfer %arg0 {{.+}} -> !stream.resource<*>{%[[ARG0_SZ:.+]]}
%0 = stream.async.transfer %arg0 : !stream.resource<staging>{%arg1} -> !stream.resource<*>{%arg1}
// CHECK: %[[ARG2_T:.+]] = stream.async.transfer %arg2 {{.+}} -> !stream.resource<*>{%[[ARG2_SZ:.+]]}
@@ -25,13 +25,13 @@
%3 = stream.resource.size %2 : !stream.resource<*>
// CHECK: = stream.async.transfer %[[RET_T]] : !stream.resource<*>{%[[RET_SIZE]]}
%4 = stream.async.transfer %2 : !stream.resource<*>{%3} -> !stream.resource<staging>{%3}
- return %4, %3 : !stream.resource<staging>, index
+ util.return %4, %3 : !stream.resource<staging>, index
}
// -----
// CHECK-LABEL: @FoldSubviewIntoLoadOp
-func.func @FoldSubviewIntoLoadOp(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
+util.func private @FoldSubviewIntoLoadOp(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -39,14 +39,14 @@
%0 = stream.resource.subview %arg0[%c128] : !stream.resource<staging>{%arg1} -> !stream.resource<staging>{%c256}
// CHECK: = stream.resource.load %arg0[%c192] : !stream.resource<staging>{%arg1} -> i32
%1 = stream.resource.load %0[%c64] : !stream.resource<staging>{%c256} -> i32
- return %1 : i32
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @DontFoldSubviewIntoLoadAcrossAwaitOp
// CHECK-SAME: (%[[SOURCE:.+]]: !stream.resource<staging>, %[[SIZE:.+]]: index, %[[FENCE:.+]]: !stream.timepoint)
-func.func @DontFoldSubviewIntoLoadAcrossAwaitOp(%source: !stream.resource<staging>, %size: index, %fence: !stream.timepoint) -> i32 {
+util.func private @DontFoldSubviewIntoLoadAcrossAwaitOp(%source: !stream.resource<staging>, %size: index, %fence: !stream.timepoint) -> i32 {
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -56,13 +56,13 @@
%1 = stream.timepoint.await %fence => %0 : !stream.resource<staging>{%c256}
// CHECK: = stream.resource.load %[[READY]][%c192] : !stream.resource<staging>{%[[SIZE]]} -> i32
%2 = stream.resource.load %1[%c64] : !stream.resource<staging>{%c256} -> i32
- return %2 : i32
+ util.return %2 : i32
}
// -----
// CHECK-LABEL: @FoldSubviewIntoStoreOp
-func.func @FoldSubviewIntoStoreOp(%arg0: !stream.resource<staging>, %arg1: index) {
+util.func private @FoldSubviewIntoStoreOp(%arg0: !stream.resource<staging>, %arg1: index) {
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -71,7 +71,7 @@
%0 = stream.resource.subview %arg0[%c128] : !stream.resource<staging>{%arg1} -> !stream.resource<staging>{%c256}
// CHECK: stream.resource.store %c123_i32, %arg0[%c192] : i32 -> !stream.resource<staging>{%arg1}
stream.resource.store %c123_i32, %0[%c64] : i32 -> !stream.resource<staging>{%c256}
- return
+ util.return
}
// -----
@@ -79,11 +79,11 @@
// A pack with no slices folds to a zero-length slab.
// CHECK-LABEL: @FoldResourcePackOpEmpty
-func.func @FoldResourcePackOpEmpty(%allocator: !hal.allocator) -> index {
+util.func private @FoldResourcePackOpEmpty(%allocator: !hal.allocator) -> index {
// CHECK-NEXT: %[[ZERO_LENGTH:.+]] = arith.constant 0
%total_length = stream.resource.pack slices({}) : index
- // CHECK-NEXT: return %[[ZERO_LENGTH]]
- return %total_length : index
+ // CHECK-NEXT: util.return %[[ZERO_LENGTH]]
+ util.return %total_length : index
}
// -----
@@ -93,7 +93,7 @@
// CHECK-LABEL: @FoldResourcePackOpOneSlice
// CHECK-SAME: %[[OFFSET:.+]]: index,
// CHECK-SAME: %[[SIZE:.+]]: index
-func.func @FoldResourcePackOpOneSlice(%offset: index, %size: index) -> (index, index) {
+util.func private @FoldResourcePackOpOneSlice(%offset: index, %size: index) -> (index, index) {
// CHECK-NOT: stream.resource.pack
%total_length, %offset_0 =
stream.resource.pack
@@ -101,8 +101,8 @@
slices({
[0, 4] = %size
}) : index
- // CHECK: return %[[SIZE]], %[[OFFSET]]
- return %total_length, %offset_0 : index, index
+ // CHECK: util.return %[[SIZE]], %[[OFFSET]]
+ util.return %total_length, %offset_0 : index, index
}
// -----
@@ -110,7 +110,7 @@
// A constant zero offset operand gets dropped.
// CHECK-LABEL: @PropagateResourcePackZeroOffset
-func.func @PropagateResourcePackZeroOffset(%size : index) -> (index, index, index) {
+util.func private @PropagateResourcePackZeroOffset(%size : index) -> (index, index, index) {
// CHECK-NOT: constant 0
// CHECK-NEXT: = stream.resource.pack slices({
%base_offset = arith.constant 0 : index
@@ -121,7 +121,7 @@
[0, 4] = %size,
[1, 2] = %size,
}) : index
- return %total_length, %offset_0, %offset_1 : index, index, index
+ util.return %total_length, %offset_0, %offset_1 : index, index, index
}
// -----
@@ -131,7 +131,7 @@
// CHECK-LABEL: @PropagateResourcePackBaseOffset
// CHECK-SAME: %[[BASE_OFFSET:.+]]: index,
// CHECK-SAME: %[[SIZE:.+]]: index
-func.func @PropagateResourcePackBaseOffset(%base_offset: index, %size : index) -> (index, index, index) {
+util.func private @PropagateResourcePackBaseOffset(%base_offset: index, %size : index) -> (index, index, index) {
// CHECK-NEXT: %[[PACKED:.+]]:3 =
%total_length, %offset_0, %offset_1 =
// CHECK-SAME: stream.resource.pack slices({
@@ -143,8 +143,8 @@
}) : index
// CHECK: %[[ADJUSTED_0:.+]] = arith.addi %[[BASE_OFFSET]], %[[PACKED]]#1
// CHECK-NEXT: %[[ADJUSTED_1:.+]] = arith.addi %[[BASE_OFFSET]], %[[PACKED]]#2
- // CHECK-NEXT: return %[[PACKED]]#0, %[[ADJUSTED_0]], %[[ADJUSTED_1]]
- return %total_length, %offset_0, %offset_1 : index, index, index
+ // CHECK-NEXT: util.return %[[PACKED]]#0, %[[ADJUSTED_0]], %[[ADJUSTED_1]]
+ util.return %total_length, %offset_0, %offset_1 : index, index, index
}
// -----
@@ -153,7 +153,7 @@
// CHECK-LABEL: @CanonicalizeResourcePackIntervals
// CHECK-SAME: %[[SIZE:.+]]: index
-func.func @CanonicalizeResourcePackIntervals(%size : index) -> (index, index, index) {
+util.func private @CanonicalizeResourcePackIntervals(%size : index) -> (index, index, index) {
// CHECK-NEXT: %[[PACKED:.+]]:3 =
%total_length, %offset_0, %offset_1 =
// CHECK-SAME: stream.resource.pack slices({
@@ -164,25 +164,25 @@
[1, 2] = %size,
[0, 4] = %size,
}) : index
- // CHECK: return %[[PACKED]]#0, %[[PACKED]]#2, %[[PACKED]]#1
- return %total_length, %offset_0, %offset_1 : index, index, index
+ // CHECK: util.return %[[PACKED]]#0, %[[PACKED]]#2, %[[PACKED]]#1
+ util.return %total_length, %offset_0, %offset_1 : index, index, index
}
// -----
// CHECK-LABEL: @FoldResourceSubviewOp
-func.func @FoldResourceSubviewOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @FoldResourceSubviewOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.resource.subview
%0 = stream.resource.subview %arg0[%c0] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%arg1}
- // CHECK: return %arg0
- return %0 : !stream.resource<*>
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @FoldResourceSubviewOps
-func.func @FoldResourceSubviewOps(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @FoldResourceSubviewOps(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%c300 = arith.constant 300 : index
%c400 = arith.constant 400 : index
@@ -191,14 +191,14 @@
%0 = stream.resource.subview %arg0[%c100] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c500}
%1 = stream.resource.subview %0[%c100] : !stream.resource<*>{%c500} -> !stream.resource<*>{%c400}
%2 = stream.resource.subview %1[%c100] : !stream.resource<*>{%c400} -> !stream.resource<*>{%c300}
- // CHECK-NEXT: return %[[RET]]
- return %2 : !stream.resource<*>
+ // CHECK-NEXT: util.return %[[RET]]
+ util.return %2 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @SinkSubviewAcrossSelectOps
-func.func @SinkSubviewAcrossSelectOps(%arg0: !stream.resource<*>, %arg1: i1) -> !stream.resource<*> {
+util.func private @SinkSubviewAcrossSelectOps(%arg0: !stream.resource<*>, %arg1: i1) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -209,8 +209,8 @@
// CHECK: %[[OFFSET:.+]] = arith.select %arg1, %c0, %c128 : index
%2 = arith.select %arg1, %0, %1 : !stream.resource<*>
// CHECK-NEXT: %[[SUBVIEW:.+]] = stream.resource.subview %arg0[%[[OFFSET]]] : !stream.resource<*>{%c256} -> !stream.resource<*>{%c128}
- // CHECK-NEXT: return %[[SUBVIEW]]
- return %2 : !stream.resource<*>
+ // CHECK-NEXT: util.return %[[SUBVIEW]]
+ util.return %2 : !stream.resource<*>
}
// -----
@@ -219,9 +219,9 @@
// CHECK-LABEL: unrealizedCastCleanup
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<transient>, %[[ARG1:.+]]: index)
-func.func @unrealizedCastCleanup(%arg0: !stream.resource<transient>, %arg1: index) -> (!stream.resource<transient>, index) {
+util.func private @unrealizedCastCleanup(%arg0: !stream.resource<transient>, %arg1: index) -> (!stream.resource<transient>, index) {
%0 = builtin.unrealized_conversion_cast %arg0, %arg1 : !stream.resource<transient>, index to !stream.resource<transient>
%1 = stream.resource.size %0 : !stream.resource<transient>
- // CHECK-NEXT: return %[[ARG0]], %[[ARG1]]
- return %0, %1 : !stream.resource<transient>, index
+ // CHECK-NEXT: util.return %[[ARG0]], %[[ARG1]]
+ util.return %0, %1 : !stream.resource<transient>, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_ops.mlir
index f19f530..6121ee3 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/resource_ops.mlir
@@ -1,79 +1,79 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @resourceAlloc
-func.func @resourceAlloc(%arg0: index) -> !stream.resource<*> {
+util.func private @resourceAlloc(%arg0: index) -> !stream.resource<*> {
// CHECK: = stream.resource.alloc uninitialized : !stream.resource<*>{%arg0}
%0 = stream.resource.alloc uninitialized : !stream.resource<*>{%arg0}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @resourceAlloca
-func.func @resourceAlloca(%arg0: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<staging>, !stream.timepoint, !stream.resource<staging>, !stream.timepoint) {
+util.func private @resourceAlloca(%arg0: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<staging>, !stream.timepoint, !stream.resource<staging>, !stream.timepoint) {
// CHECK: = stream.resource.alloca uninitialized : !stream.resource<staging>{%arg0} => !stream.timepoint
%0:2 = stream.resource.alloca uninitialized : !stream.resource<staging>{%arg0} => !stream.timepoint
// CHECK: = stream.resource.alloca uninitialized await(%arg1) => !stream.resource<staging>{%arg0} => !stream.timepoint
%1:2 = stream.resource.alloca uninitialized await(%await_timepoint) => !stream.resource<staging>{%arg0} => !stream.timepoint
- return %0#0, %0#1, %1#0, %1#1 : !stream.resource<staging>, !stream.timepoint, !stream.resource<staging>, !stream.timepoint
+ util.return %0#0, %0#1, %1#0, %1#1 : !stream.resource<staging>, !stream.timepoint, !stream.resource<staging>, !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceDealloca
-func.func @resourceDealloca(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) {
+util.func private @resourceDealloca(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) {
// CHECK: = stream.resource.dealloca %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
stream.resource.dealloca %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
// CHECK: = stream.resource.dealloca await(%arg2) => %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
stream.resource.dealloca await(%arg2) => %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
- return
+ util.return
}
// -----
// CHECK-LABEL: @resourceSize
-func.func @resourceSize(%arg0: !stream.resource<*>) -> index {
+util.func private @resourceSize(%arg0: !stream.resource<*>) -> index {
// CHECK: = stream.resource.size %arg0 : !stream.resource<*>
%0 = stream.resource.size %arg0 : !stream.resource<*>
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @resourceTryMap
-func.func @resourceTryMap(%arg0: !util.buffer) -> (i1, !stream.resource<constant>) {
+util.func private @resourceTryMap(%arg0: !util.buffer) -> (i1, !stream.resource<constant>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.resource.try_map %arg0[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c128}
%0:2 = stream.resource.try_map %arg0[%c0] : !util.buffer -> i1, !stream.resource<constant>{%c128}
- return %0#0, %0#1 : i1, !stream.resource<constant>
+ util.return %0#0, %0#1 : i1, !stream.resource<constant>
}
// -----
// CHECK-LABEL: @resourceLoad
-func.func @resourceLoad(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
+util.func private @resourceLoad(%arg0: !stream.resource<staging>, %arg1: index) -> i32 {
%c0 = arith.constant 0 : index
// CHECK: = stream.resource.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> i32
%0 = stream.resource.load %arg0[%c0] : !stream.resource<staging>{%arg1} -> i32
- return %0 : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @resourceStore
-func.func @resourceStore(%arg0: !stream.resource<staging>, %arg1: index) {
+util.func private @resourceStore(%arg0: !stream.resource<staging>, %arg1: index) {
%c0 = arith.constant 0 : index
%c123_i32 = arith.constant 123 : i32
// CHECK: stream.resource.store %c123_i32, %arg0[%c0] : i32 -> !stream.resource<staging>{%arg1}
stream.resource.store %c123_i32, %arg0[%c0] : i32 -> !stream.resource<staging>{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @resourcePack
-func.func @resourcePack(%arg0: index, %arg1: index) -> (index, index, index) {
+util.func private @resourcePack(%arg0: index, %arg1: index) -> (index, index, index) {
%c128 = arith.constant 128 : index
// CHECK: stream.resource.pack offset(%c128) slices({
// CHECK-NEXT: [0, 9] = %arg0,
@@ -83,13 +83,13 @@
[0, 9] = %arg0,
[3, 8] = %arg1,
}) : index
- return %0#0, %0#1, %0#2 : index, index, index
+ util.return %0#0, %0#1, %0#2 : index, index, index
}
// -----
// CHECK-LABEL: @resourceConstants
-func.func @resourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
+util.func private @resourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
// CHECK: = stream.resource.constants :
@@ -100,16 +100,16 @@
!stream.resource<constant>{%c4} = dense<100> : tensor<1xi32>,
!stream.resource<constant>{%c8} = dense<[101, 102]> : tensor<2xi32>
=> !stream.timepoint
- return %0#0, %0#1, %0#2 : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ util.return %0#0, %0#1, %0#2 : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceSubview
-func.func @resourceSubview(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @resourceSubview(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
// CHECK: = stream.resource.subview %arg0[%c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c256}
%0 = stream.resource.subview %arg0[%c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c256}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_folding.mlir
index 1d6aadf..08ad4a1 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_folding.mlir
@@ -1,201 +1,201 @@
// RUN: iree-opt --split-input-file --canonicalize %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @FoldTensorImportOp
-func.func @FoldTensorImportOp(%arg0: !stream.resource<external>, %arg1: index) -> !stream.resource<external> {
+util.func private @FoldTensorImportOp(%arg0: !stream.resource<external>, %arg1: index) -> !stream.resource<external> {
// CHECK-NOT: stream.tensor.import
// CHECK-NOT: stream.tensor.export
- // CHECK: return %arg0 : !stream.resource<external>
+ // CHECK: util.return %arg0 : !stream.resource<external>
%c20 = arith.constant 20 : index
%0 = stream.tensor.export %arg0 : tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20} -> !hal.buffer_view
%1 = stream.tensor.import %0 : !hal.buffer_view -> tensor<1x?x5xf32>{%arg1} in !stream.resource<external>{%c20}
- return %1 : !stream.resource<external>
+ util.return %1 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @FoldTensorExportOp
-func.func @FoldTensorExportOp(%arg0: !hal.buffer_view, %arg1: index) -> !hal.buffer_view {
+util.func private @FoldTensorExportOp(%arg0: !hal.buffer_view, %arg1: index) -> !hal.buffer_view {
// CHECK-NOT: stream.tensor.import
// CHECK-NOT: stream.tensor.export
- // CHECK: return %arg0 : !hal.buffer_view
+ // CHECK: util.return %arg0 : !hal.buffer_view
%c20 = arith.constant 20 : index
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
%1 = stream.tensor.export %0 : tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20} -> !hal.buffer_view
- return %1 : !hal.buffer_view
+ util.return %1 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @NofoldTensorExportOpBufferToView
-func.func @NofoldTensorExportOpBufferToView(%arg0: !hal.buffer, %arg1: index) -> !hal.buffer_view {
+util.func private @NofoldTensorExportOpBufferToView(%arg0: !hal.buffer, %arg1: index) -> !hal.buffer_view {
// CHECK: %[[IMPORT:.+]] = stream.tensor.import
// CHECK: %[[EXPORT:.+]] = stream.tensor.export %[[IMPORT]]
- // CHECK: return %[[EXPORT]] : !hal.buffer_view
+ // CHECK: util.return %[[EXPORT]] : !hal.buffer_view
%c20 = arith.constant 20 : index
%0 = stream.tensor.import %arg0 : !hal.buffer -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
%1 = stream.tensor.export %0 : tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20} -> !hal.buffer_view
- return %1 : !hal.buffer_view
+ util.return %1 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @KeepTensorExportOpWithDifferingEncodings
-func.func @KeepTensorExportOpWithDifferingEncodings(%arg0: !hal.buffer_view, %arg1: index) -> !hal.buffer_view {
+util.func private @KeepTensorExportOpWithDifferingEncodings(%arg0: !hal.buffer_view, %arg1: index) -> !hal.buffer_view {
// CHECK: %[[IMPORT:.+]] = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
// CHECK: %[[EXPORT:.+]] = stream.tensor.export %[[IMPORT]] : tensor<1x?x5xf32>{%arg1} in !stream.resource<external>{%c20} -> !hal.buffer_view
- // CHECK: return %[[EXPORT]] : !hal.buffer_view
+ // CHECK: util.return %[[EXPORT]] : !hal.buffer_view
%c20 = arith.constant 20 : index
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
%1 = stream.tensor.export %0 : tensor<1x?x5xf32>{%arg1} in !stream.resource<external>{%c20} -> !hal.buffer_view
- return %1 : !hal.buffer_view
+ util.return %1 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @TensorConstantToEmpty
-func.func @TensorConstantToEmpty(%arg0: index) -> !stream.resource<constant> {
+util.func private @TensorConstantToEmpty(%arg0: index) -> !stream.resource<constant> {
// CHECK: %[[EMPTY:.+]] = stream.tensor.empty : tensor<2x0x?xf32>{%arg0} in !stream.resource<constant>
- // CHECK: return %[[EMPTY]]
+ // CHECK: util.return %[[EMPTY]]
// CHECK-NOT: stream.tensor.constant
%cst = stream.tensor.constant : tensor<2x0x?xf32>{%arg0} in !stream.resource<constant> = dense<> : tensor<2x0x4xf32>
- return %cst : !stream.resource<constant>
+ util.return %cst : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @TensorConstantToEmptyDynamic
-func.func @TensorConstantToEmptyDynamic() -> !stream.resource<constant> {
+util.func private @TensorConstantToEmptyDynamic() -> !stream.resource<constant> {
// CHECK: %[[EMPTY:.+]] = stream.tensor.empty : tensor<2x?xf32>{%c0} in !stream.resource<constant>
- // CHECK: return %[[EMPTY]]
+ // CHECK: util.return %[[EMPTY]]
// CHECK-NOT: stream.tensor.constant
%c0 = arith.constant 0 : index
%cst = stream.tensor.constant : tensor<2x?xf32>{%c0} in !stream.resource<constant> = dense<> : tensor<2x0xf32>
- return %cst : !stream.resource<constant>
+ util.return %cst : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @TensorConstantToSplat
-func.func @TensorConstantToSplat() -> !stream.resource<constant> {
+util.func private @TensorConstantToSplat() -> !stream.resource<constant> {
// CHECK-DAG: %[[CST:.+]] = arith.constant 1.000000e+00 : f32
// CHECK-DAG: %[[SIZE:.+]] = stream.tensor.sizeof tensor<2x2xf32> : index
// CHECK: = stream.tensor.splat %[[CST]] : f32 -> tensor<2x2xf32> in !stream.resource<*>{%[[SIZE]]}
%cst = stream.tensor.constant : tensor<2x2xf32> in !stream.resource<constant> = dense<1.000000e+00> : tensor<2x2xf32>
- return %cst : !stream.resource<constant>
+ util.return %cst : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @TensorComplexConstantToSplat
-func.func @TensorComplexConstantToSplat() -> !stream.resource<constant> {
+util.func private @TensorComplexConstantToSplat() -> !stream.resource<constant> {
// CHECK-DAG: %[[CST:.+]] = complex.constant [2.000000e+00 : f32, 3.000000e+00 : f32] : complex<f32>
// CHECK-DAG: %[[SIZE:.+]] = stream.tensor.sizeof tensor<2x2xcomplex<f32>> : index
// CHECK: = stream.tensor.splat %[[CST]] : complex<f32> -> tensor<2x2xcomplex<f32>> in !stream.resource<*>{%[[SIZE]]}
%cst = stream.tensor.constant : tensor<2x2xcomplex<f32>> in !stream.resource<constant> = dense<(2.000000e+00,3.000000e+00)> : tensor<2x2xcomplex<f32>>
- return %cst : !stream.resource<constant>
+ util.return %cst : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternI32ToI8
-func.func @NarrowSplatPatternI32ToI8() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternI32ToI8() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0xAAAAAAAA : i32
// CHECK: stream.tensor.splat %c-86_i8 : i8
%0 = stream.tensor.splat %pattern : i32 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternI32ToI16
-func.func @NarrowSplatPatternI32ToI16() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternI32ToI16() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0xAABBAABB : i32
// CHECK: stream.tensor.splat %c-21829_i16 : i16
%0 = stream.tensor.splat %pattern : i32 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternI64ToI8
-func.func @NarrowSplatPatternI64ToI8() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternI64ToI8() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0 : i64
// CHECK: stream.tensor.splat %c0_i8 : i8
%0 = stream.tensor.splat %pattern : i64 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternI64ToI16
-func.func @NarrowSplatPatternI64ToI16() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternI64ToI16() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0xAABBAABBAABBAABB : i64
// CHECK: stream.tensor.splat %c-21829_i16 : i16
%0 = stream.tensor.splat %pattern : i64 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternI64ToI32
-func.func @NarrowSplatPatternI64ToI32() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternI64ToI32() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0xAABBCCDDAABBCCDD : i64
// CHECK: stream.tensor.splat %c12307677_i32
%0 = stream.tensor.splat %pattern : i64 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternBF16
-func.func @NarrowSplatPatternBF16() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternBF16() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0.0 : bf16
// CHECK: stream.tensor.splat %c0_i8 : i8
%0 = stream.tensor.splat %pattern : bf16 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NarrowSplatPatternF32
-func.func @NarrowSplatPatternF32() -> !stream.resource<*> {
+util.func private @NarrowSplatPatternF32() -> !stream.resource<*> {
%c100 = arith.constant 100 : index
%pattern = arith.constant 0.0 : f32
// CHECK: stream.tensor.splat %c0_i8 : i8
%0 = stream.tensor.splat %pattern : f32 -> tensor<2x2xf32> in !stream.resource<*>{%c100}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @FoldTensorCloneOp
-func.func @FoldTensorCloneOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
+util.func private @FoldTensorCloneOp(%arg0: !stream.resource<*>, %arg1: index) -> !stream.resource<*> {
// CHECK-NOT: stream.tensor.clone
%0 = stream.tensor.clone %arg0 : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor<2x2xf32> in !stream.resource<*>{%arg1}
- // CHECK: return %arg0
- return %0 : !stream.resource<*>
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @NofoldTensorCloneOp
-func.func @NofoldTensorCloneOp(%arg0: !stream.resource<external>, %arg1: index) -> !stream.resource<*> {
+util.func private @NofoldTensorCloneOp(%arg0: !stream.resource<external>, %arg1: index) -> !stream.resource<*> {
// CHECK: %[[CLONE:.+]] = stream.tensor.clone
%0 = stream.tensor.clone %arg0 : tensor<2x2xf32> in !stream.resource<external>{%arg1} -> tensor<2x2xf32> in !stream.resource<*>{%arg1}
- // CHECK: return %[[CLONE]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[CLONE]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @ElideUnneededTensorClones
-func.func @ElideUnneededTensorClones(%arg0: !stream.resource<*>, %arg1: index) -> f32 {
+util.func private @ElideUnneededTensorClones(%arg0: !stream.resource<*>, %arg1: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK-NOT: stream.tensor.clone
%0 = stream.tensor.clone %arg0 : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor<2x2xf32> in !stream.resource<*>{%arg1}
@@ -203,6 +203,6 @@
%1 = stream.async.transfer %0 : !stream.resource<*>{%arg1} -> !stream.resource<staging>{%arg1}
// CHECK: %[[T1:.+]] = stream.tensor.load %[[T0]][%c0, %c0] : tensor<2x2xf32> in !stream.resource<staging>{%arg1} -> f32
%2 = stream.tensor.load %1[%c0, %c0] : tensor<2x2xf32> in !stream.resource<staging>{%arg1} -> f32
- // CHECK: return %[[T1]]
- return %2 : f32
+ // CHECK: util.return %[[T1]]
+ util.return %2 : f32
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir
index 4cc586c..a224e4a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/tensor_ops.mlir
@@ -1,146 +1,146 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @tensorImport
-func.func @tensorImport(%arg0: !hal.buffer_view, %arg1: index) -> !stream.resource<external> {
+util.func private @tensorImport(%arg0: !hal.buffer_view, %arg1: index) -> !stream.resource<external> {
%c20 = arith.constant 20 : index
// CHECK: = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<?x5xf32>{%arg1} in !stream.resource<external>{%c20}
- return %0 : !stream.resource<external>
+ util.return %0 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @tensorExport
-func.func @tensorExport(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer_view {
+util.func private @tensorExport(%arg0: !stream.resource<external>, %arg1: index) -> !hal.buffer_view {
%c200 = arith.constant 200 : index
// CHECK: = stream.tensor.export %arg0 : tensor<?x1x10xf32>{%arg1} in !stream.resource<external>{%c200} -> !hal.buffer_view
%0 = stream.tensor.export %arg0 : tensor<?x1x10xf32>{%arg1} in !stream.resource<external>{%c200} -> !hal.buffer_view
- return %0 : !hal.buffer_view
+ util.return %0 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @tensorSizeOf
-func.func @tensorSizeOf(%arg0: index) -> index {
+util.func private @tensorSizeOf(%arg0: index) -> index {
// CHECK: = stream.tensor.sizeof tensor<?x5xf32>{%arg0} : index
%0 = stream.tensor.sizeof tensor<?x5xf32>{%arg0} : index
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @tensorEmpty
-func.func @tensorEmpty(%arg0: index, %arg1: index) -> !stream.resource<*> {
+util.func private @tensorEmpty(%arg0: index, %arg1: index) -> !stream.resource<*> {
// CHECK: = stream.tensor.empty : tensor<?x0xf32>{%arg0} in !stream.resource<*>{%arg1}
%0 = stream.tensor.empty : tensor<?x0xf32>{%arg0} in !stream.resource<*>{%arg1}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorConstant
-func.func @tensorConstant(%arg0: index) -> !stream.resource<constant> {
+util.func private @tensorConstant(%arg0: index) -> !stream.resource<constant> {
// CHECK: = stream.tensor.constant : tensor<?x5x64xf32>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
%0 = stream.tensor.constant : tensor<?x5x64xf32>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
- return %0 : !stream.resource<constant>
+ util.return %0 : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @tensorSplat
-func.func @tensorSplat(%arg0: f32, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func private @tensorSplat(%arg0: f32, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: = stream.tensor.splat %arg0 : f32 -> tensor<?x1x10xf32>{%arg1} in !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : f32 -> tensor<?x1x10xf32>{%arg1} in !stream.resource<*>{%arg2}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorClone
-func.func @tensorClone(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func private @tensorClone(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: = stream.tensor.clone %arg0 : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2}
%0 = stream.tensor.clone %arg0 : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorSlice
-func.func @tensorSlice(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) -> !stream.resource<*> {
+util.func private @tensorSlice(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: = stream.tensor.slice %arg0[%c0, %c1 for %arg3, %c1] : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x1xf32>{%arg3} in !stream.resource<*>{%arg4}
%0 = stream.tensor.slice %arg0[%c0, %c1 for %arg3, %c1] : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x1xf32>{%arg3} in !stream.resource<*>{%arg4}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorFill
-func.func @tensorFill(%arg0: f32, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
+util.func private @tensorFill(%arg0: f32, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f32 -> tensor<?x4xf32>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
%0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f32 -> tensor<?x4xf32>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorUpdate
-func.func @tensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) -> !stream.resource<*> {
+util.func private @tensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: = stream.tensor.update %arg0, %arg2[%c0, %c0] : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor<?x4xf32>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
%0 = stream.tensor.update %arg0, %arg2[%c0, %c0] : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor<?x4xf32>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @tensorLoad
-func.func @tensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index) -> f32 {
+util.func private @tensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: = stream.tensor.load %arg0[%c0] : tensor<?xf32>{%arg1} in !stream.resource<staging>{%arg2} -> f32
%0 = stream.tensor.load %arg0[%c0] : tensor<?xf32>{%arg1} in !stream.resource<staging>{%arg2} -> f32
- return %0 : f32
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @tensorLoadRank0
-func.func @tensorLoadRank0(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
+util.func private @tensorLoadRank0(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: = stream.tensor.load %arg0 : tensor<f32> in !stream.resource<staging>{%arg1} -> f32
%0 = stream.tensor.load %arg0 : tensor<f32> in !stream.resource<staging>{%arg1} -> f32
- return %0 : f32
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @tensorStore
-func.func @tensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: f32) -> !stream.resource<staging> {
+util.func private @tensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: = stream.tensor.store %arg3, %arg0[%c0] : f32 -> tensor<?xf32>{%arg1} in %arg0 as !stream.resource<staging>{%arg2}
%0 = stream.tensor.store %arg3, %arg0[%c0] : f32 -> tensor<?xf32>{%arg1} in %arg0 as !stream.resource<staging>{%arg2}
- return %0 : !stream.resource<staging>
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @tensorStoreRank0
-func.func @tensorStoreRank0(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
+util.func private @tensorStoreRank0(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: = stream.tensor.store %arg2, %arg0 : f32 -> tensor<f32> in %arg0 as !stream.resource<staging>{%arg1}
%0 = stream.tensor.store %arg2, %arg0 : f32 -> tensor<f32> in %arg0 as !stream.resource<staging>{%arg1}
- return %0 : !stream.resource<staging>
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @tensorTrace
// CHECK-SAME: (%[[TENSOR0:.+]]: !stream.resource<staging>, %[[TENSOR0_SIZE:.+]]: index, %[[TENSOR1:.+]]: !stream.resource<staging>, %[[TENSOR1_SIZE:.+]]: index, %[[TENSOR1_DIM0:.+]]: index, %[[TENSOR1_DIM2:.+]]: index)
-func.func @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index, %tensor1_dim2: index) {
+util.func private @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index, %tensor1_dim2: index) {
// CHECK: stream.tensor.trace "FOOBAR" = [
// CHECK-NEXT: %[[TENSOR0]] : tensor<5xf32> in !stream.resource<staging>{%[[TENSOR0_SIZE]]},
// CHECK-NEXT: %[[TENSOR1]] : tensor<?x3x?xi32>{%[[TENSOR1_DIM0]], %[[TENSOR1_DIM2]]} in !stream.resource<staging>{%[[TENSOR1_SIZE]]}
@@ -149,5 +149,5 @@
%tensor0 : tensor<5xf32> in !stream.resource<staging>{%tensor0_size},
%tensor1 : tensor<?x3x?xi32>{%tensor1_dim0, %tensor1_dim2} in !stream.resource<staging>{%tensor1_size}
]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_folding.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_folding.mlir
index 3fa662b..14ed05a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_folding.mlir
@@ -1,39 +1,39 @@
// RUN: iree-opt --split-input-file --canonicalize %s | FileCheck %s
// CHECK-LABEL: @FoldTimepointExport
-func.func @FoldTimepointExport(%arg0: !hal.semaphore, %arg1: index) -> (!hal.semaphore, index) {
+util.func private @FoldTimepointExport(%arg0: !hal.semaphore, %arg1: index) -> (!hal.semaphore, index) {
// CHECK-NOT: stream.timepoint.import
%0 = stream.timepoint.import %arg0, %arg1 : (!hal.semaphore, index) => !stream.timepoint
// CHECK-NOT: stream.timepoint.export
%1:2 = stream.timepoint.export %0 => (!hal.semaphore, index)
- // CHECK: return %arg0, %arg1
- return %1#0, %1#1 : !hal.semaphore, index
+ // CHECK: util.return %arg0, %arg1
+ util.return %1#0, %1#1 : !hal.semaphore, index
}
// -----
// CHECK-LABEL: @DontFoldTimepointExportMismatch
-func.func @DontFoldTimepointExportMismatch(%arg0: !hal.semaphore, %arg1: index) -> (!hal.semaphore, i32) {
+util.func private @DontFoldTimepointExportMismatch(%arg0: !hal.semaphore, %arg1: index) -> (!hal.semaphore, i32) {
// CHECK: stream.timepoint.import
%0 = stream.timepoint.import %arg0, %arg1 : (!hal.semaphore, index) => !stream.timepoint
// CHECK-NEXT: stream.timepoint.export
%1:2 = stream.timepoint.export %0 => (!hal.semaphore, i32)
- return %1#0, %1#1 : !hal.semaphore, i32
+ util.return %1#0, %1#1 : !hal.semaphore, i32
}
// -----
// CHECK-LABEL: @PassThroughChainExternal
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[ARG_FENCE:.+]]: !hal.fence)
-func.func @PassThroughChainExternal(%device: !hal.device, %arg_fence: !hal.fence) -> !hal.fence {
+util.func private @PassThroughChainExternal(%device: !hal.device, %arg_fence: !hal.fence) -> !hal.fence {
// CHECK-NOT: stream.timepoint.import
%timepoint = stream.timepoint.import %arg_fence : (!hal.fence) => !stream.timepoint
// CHECK-NOT: hal.fence.create
%chained_fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
// CHECK-NOT: stream.timepoint.chain_external
stream.timepoint.chain_external %timepoint => (%chained_fence : !hal.fence)
- // CHECK: return %[[ARG_FENCE]]
- return %chained_fence : !hal.fence
+ // CHECK: util.return %[[ARG_FENCE]]
+ util.return %chained_fence : !hal.fence
}
// -----
@@ -42,109 +42,109 @@
// CHECK-LABEL: @DontPassThroughChainExternal
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[ARG_FENCE:.+]]: !hal.fence, %[[CHAINED_FENCE:.+]]: !hal.fence)
-func.func @DontPassThroughChainExternal(%device: !hal.device, %arg_fence: !hal.fence, %chained_fence: !hal.fence) -> !hal.fence {
+util.func private @DontPassThroughChainExternal(%device: !hal.device, %arg_fence: !hal.fence, %chained_fence: !hal.fence) -> !hal.fence {
// CHECK: %[[TIMEPOINT:.+]] = stream.timepoint.import %[[ARG_FENCE]]
%timepoint = stream.timepoint.import %arg_fence : (!hal.fence) => !stream.timepoint
// CHECK: stream.timepoint.chain_external %[[TIMEPOINT]] => (%[[CHAINED_FENCE]]
stream.timepoint.chain_external %timepoint => (%chained_fence : !hal.fence)
- // CHECK: return %[[CHAINED_FENCE]]
- return %chained_fence : !hal.fence
+ // CHECK: util.return %[[CHAINED_FENCE]]
+ util.return %chained_fence : !hal.fence
}
// -----
// CHECK-LABEL: @FoldTimepointJoinOp
-func.func @FoldTimepointJoinOp(%arg0: !stream.timepoint) -> !stream.timepoint {
+util.func private @FoldTimepointJoinOp(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK-NOT: stream.timepoint.join
%0 = stream.timepoint.join max(%arg0) => !stream.timepoint
- // CHECK: return %arg0
- return %0 : !stream.timepoint
+ // CHECK: util.return %arg0
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideImmediateTimepointJoinOperands
-func.func @ElideImmediateTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
+util.func private @ElideImmediateTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
%0 = stream.timepoint.immediate => !stream.timepoint
%1 = stream.timepoint.immediate => !stream.timepoint
// CHECK: = stream.timepoint.join max(%arg0, %arg1)
%2 = stream.timepoint.join max(%arg0, %0, %1, %arg1) => !stream.timepoint
- return %2 : !stream.timepoint
+ util.return %2 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideImmediateTimepointJoinOperandsAll
-func.func @ElideImmediateTimepointJoinOperandsAll() -> !stream.timepoint {
+util.func private @ElideImmediateTimepointJoinOperandsAll() -> !stream.timepoint {
%0 = stream.timepoint.immediate => !stream.timepoint
%1 = stream.timepoint.immediate => !stream.timepoint
// CHECK-NOT: stream.timepoint.join
%2 = stream.timepoint.join max(%0, %1) => !stream.timepoint
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
- // CHECK: return %[[IMM]]
- return %2 : !stream.timepoint
+ // CHECK: util.return %[[IMM]]
+ util.return %2 : !stream.timepoint
}
// -----
// CHECK-LABEL: @FoldDuplicateTimepointJoinOperands
-func.func @FoldDuplicateTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
+util.func private @FoldDuplicateTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
// CHECK: = stream.timepoint.join max(%arg0, %arg1)
%0 = stream.timepoint.join max(%arg0, %arg1, %arg0, %arg1) => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ExpandTimepointJoinOperands
-func.func @ExpandTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint, %arg2: !stream.timepoint, %arg3: !stream.timepoint) -> !stream.timepoint {
+util.func private @ExpandTimepointJoinOperands(%arg0: !stream.timepoint, %arg1: !stream.timepoint, %arg2: !stream.timepoint, %arg3: !stream.timepoint) -> !stream.timepoint {
%join0 = stream.timepoint.join max(%arg0, %arg1) => !stream.timepoint
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%arg2, %arg0, %arg1, %arg3)
%join1 = stream.timepoint.join max(%arg2, %join0, %arg3) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join1 : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join1 : !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideImmediateBarrier
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @ElideImmediateBarrier(%size: index) -> (!stream.resource<external>, !stream.timepoint) {
+util.func private @ElideImmediateBarrier(%size: index) -> (!stream.resource<external>, !stream.timepoint) {
// CHECK-DAG: %[[RESOURCE:.+]] = stream.resource.alloc
%r0 = stream.resource.alloc uninitialized : !stream.resource<external>{%size}
// CHECK-DAG: %[[FENCE:.+]] = stream.timepoint.immediate
// CHECK-NOT: stream.timepoint.barrier
%r1, %r1t = stream.timepoint.barrier %r0 : !stream.resource<external>{%size} => !stream.timepoint
- // CHECK: return %[[RESOURCE]], %[[FENCE]]
- return %r1, %r1t : !stream.resource<external>, !stream.timepoint
+ // CHECK: util.return %[[RESOURCE]], %[[FENCE]]
+ util.return %r1, %r1t : !stream.resource<external>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ChainTimepoints
// CHECK-SAME: (%[[FENCE:.+]]: !stream.timepoint, %[[SOURCE:.+]]: !stream.resource<external>)
-func.func @ChainTimepoints(%fence: !stream.timepoint, %source: !stream.resource<external>) -> (!stream.resource<external>, !stream.timepoint) {
+util.func private @ChainTimepoints(%fence: !stream.timepoint, %source: !stream.resource<external>) -> (!stream.resource<external>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK-NOT: stream.timepoint.await
%r0 = stream.timepoint.await %fence => %source : !stream.resource<external>{%c128}
// CHECK-NOT: stream.timepoint.barrier
%r1, %r1t = stream.timepoint.barrier %r0 : !stream.resource<external>{%c128} => !stream.timepoint
- // CHECK: return %[[SOURCE]], %[[FENCE]]
- return %r1, %r1t : !stream.resource<external>, !stream.timepoint
+ // CHECK: util.return %[[SOURCE]], %[[FENCE]]
+ util.return %r1, %r1t : !stream.resource<external>, !stream.timepoint
}
// -----
// CHECK-LABEL: @ElideImmediateHostAwaits
-func.func @ElideImmediateHostAwaits(%arg0: !stream.resource<staging>) -> !stream.resource<staging> {
+util.func private @ElideImmediateHostAwaits(%arg0: !stream.resource<staging>) -> !stream.resource<staging> {
%c100 = arith.constant 100 : index
// CHECK-NOT: stream.timepoint.immediate
%0 = stream.timepoint.immediate => !stream.timepoint
// CHECK-NOT: stream.timepoint.await
%1 = stream.timepoint.await %0 => %arg0 : !stream.resource<staging>{%c100}
- // CHECK: return %arg0
- return %1 : !stream.resource<staging>
+ // CHECK: util.return %arg0
+ util.return %1 : !stream.resource<staging>
}
// -----
@@ -153,7 +153,7 @@
// use the awaited resources.
// CHECK-LABEL: @SinkAwaitToFirstConsumer
-func.func @SinkAwaitToFirstConsumer(
+util.func private @SinkAwaitToFirstConsumer(
%arg0: i1, %arg1: i1,
%arg2: !stream.resource<constant>,
%arg3: !stream.resource<staging>,
@@ -183,13 +183,13 @@
cf.br ^bb4(%2 : !stream.resource<external>)
// CHECK: ^bb4(
^bb4(%arg6: !stream.resource<external>):
- return %arg6 : !stream.resource<external>
+ util.return %arg6 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @SinkSubviewsAcrossAwaits
-func.func @SinkSubviewsAcrossAwaits(
+util.func private @SinkSubviewsAcrossAwaits(
%arg0: !stream.resource<*>, %arg1: index,
%arg2: !stream.timepoint
) -> !stream.resource<*> {
@@ -199,14 +199,14 @@
// CHECK: %[[RET:.+]] = stream.resource.subview %[[READY]][%c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c256}
%0 = stream.resource.subview %arg0[%c128] : !stream.resource<*>{%arg1} -> !stream.resource<*>{%c256}
%1 = stream.timepoint.await %arg2 => %0 : !stream.resource<*>{%c256}
- // CHECK: return %[[RET]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @GroupAwaitsByTimepoint
-func.func @GroupAwaitsByTimepoint(
+util.func private @GroupAwaitsByTimepoint(
%arg0: !stream.timepoint,
%arg1: !stream.resource<*>,
%arg2: !stream.resource<*>,
@@ -222,8 +222,8 @@
%0 = stream.timepoint.await %arg0 => %arg1 : !stream.resource<*>{%c100}
%1 = stream.timepoint.await %arg0 => %arg2 : !stream.resource<*>{%c101}
%2:2 = stream.timepoint.await %arg0 => %arg3, %arg4 : !stream.resource<*>{%c102}, !stream.resource<*>{%c103}
- // CHECK-NEXT: return %[[RET]]#0, %[[RET]]#1, %[[RET]]#2, %[[RET]]#3
- return %0, %1, %2#0, %2#1 : !stream.resource<*>, !stream.resource<*>, !stream.resource<*>, !stream.resource<*>
+ // CHECK-NEXT: util.return %[[RET]]#0, %[[RET]]#1, %[[RET]]#2, %[[RET]]#3
+ util.return %0, %1, %2#0, %2#1 : !stream.resource<*>, !stream.resource<*>, !stream.resource<*>, !stream.resource<*>
}
// -----
@@ -231,25 +231,25 @@
// Tests that the pattern doesn't kick in when it would be unsafe to group the
// awaits due to operand dependencies.
-func.func private @materializeResource0() -> !stream.resource<*>
-func.func private @materializeResource1(!stream.resource<*>) -> !stream.resource<*>
+util.func private @materializeResource0() -> !stream.resource<*>
+util.func private @materializeResource1(!stream.resource<*>) -> !stream.resource<*>
// CHECK-LABEL: @GroupAwaitsByTimepointUnsafe
-func.func @GroupAwaitsByTimepointUnsafe(
+util.func private @GroupAwaitsByTimepointUnsafe(
%arg0: !stream.timepoint
) -> (!stream.resource<*>, !stream.resource<*>) {
%c100 = arith.constant 100 : index
%c101 = arith.constant 101 : index
- // CHECK: call @materializeResource0
- %r0a = call @materializeResource0() : () -> !stream.resource<*>
+ // CHECK: util.call @materializeResource0
+ %r0a = util.call @materializeResource0() : () -> !stream.resource<*>
// CHECK-NEXT: stream.timepoint.await
%r0b = stream.timepoint.await %arg0 => %r0a : !stream.resource<*>{%c100}
- // CHECK-NEXT: call @materializeResource1
- %r1a = call @materializeResource1(%r0b) : (!stream.resource<*>) -> !stream.resource<*>
+ // CHECK-NEXT: util.call @materializeResource1
+ %r1a = util.call @materializeResource1(%r0b) : (!stream.resource<*>) -> !stream.resource<*>
// CHECK-NEXT: stream.timepoint.await
%r1b = stream.timepoint.await %arg0 => %r1a : !stream.resource<*>{%c101}
- // CHECK-NEXT: return
- return %r0b, %r1b : !stream.resource<*>, !stream.resource<*>
+ // CHECK-NEXT: util.return
+ util.return %r0b, %r1b : !stream.resource<*>, !stream.resource<*>
}
// -----
@@ -257,10 +257,10 @@
// Tests that the pattern doesn't kick in when the same timepoint are waited in
// different blocks.
-func.func private @materializeResource() -> !stream.resource<*>
+util.func private @materializeResource() -> !stream.resource<*>
// CHECK-LABEL: @DontGroupAwaitsByTimepointAcrossBlocks
-func.func @DontGroupAwaitsByTimepointAcrossBlocks(
+util.func private @DontGroupAwaitsByTimepointAcrossBlocks(
%arg0: !stream.timepoint,
%arg1: !stream.resource<*>,
%arg2: i1
@@ -273,20 +273,20 @@
// CHECK: ^bb
^bb0:
// CHECK: stream.timepoint.await %arg0 => %arg1
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
// CHECK: ^bb
^bb1:
- // CHECK: %[[R:.+]] = call @materializeResource
- %r = call @materializeResource() : () -> !stream.resource<*>
+ // CHECK: %[[R:.+]] = util.call @materializeResource
+ %r = util.call @materializeResource() : () -> !stream.resource<*>
// CHECK: stream.timepoint.await %arg0 => %[[R]]
%1 = stream.timepoint.await %arg0 => %r : !stream.resource<*>{%c101}
- return %1 : !stream.resource<*>
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @FoldDuplicateAwaitResources
-func.func @FoldDuplicateAwaitResources(
+util.func private @FoldDuplicateAwaitResources(
%arg0: !stream.timepoint,
%arg1: !stream.resource<staging>, %arg2: !stream.resource<*>
) -> (!stream.resource<staging>, !stream.resource<*>, !stream.resource<staging>, !stream.resource<staging>) {
@@ -294,14 +294,14 @@
%c200 = arith.constant 200 : index
// CHECK: %[[RET:.+]]:2 = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
%0:4 = stream.timepoint.await %arg0 => %arg1, %arg2, %arg1, %arg1 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}, !stream.resource<staging>{%c100}, !stream.resource<staging>{%c100}
- // CHECK: return %[[RET]]#0, %[[RET]]#1, %[[RET]]#0, %[[RET]]#0
- return %0#0, %0#1, %0#2, %0#3 : !stream.resource<staging>, !stream.resource<*>, !stream.resource<staging>, !stream.resource<staging>
+ // CHECK: util.return %[[RET]]#0, %[[RET]]#1, %[[RET]]#0, %[[RET]]#0
+ util.return %0#0, %0#1, %0#2, %0#3 : !stream.resource<staging>, !stream.resource<*>, !stream.resource<staging>, !stream.resource<staging>
}
// -----
// CHECK-LABEL: @ElideUnusedTimepointAwaitOp
-func.func @ElideUnusedTimepointAwaitOp(
+util.func private @ElideUnusedTimepointAwaitOp(
%arg0: !stream.timepoint,
%arg1: !stream.resource<staging>, %arg2: !stream.resource<*>
) {
@@ -309,5 +309,5 @@
%c200 = arith.constant 200 : index
// CHECK-NOT: stream.timepoint.await
%0:2 = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_ops.mlir
index 15c6d6b..ceb0994 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/test/timepoint_ops.mlir
@@ -1,55 +1,55 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @timepointImmediate
-func.func @timepointImmediate() -> !stream.timepoint {
+util.func private @timepointImmediate() -> !stream.timepoint {
// CHECK: = stream.timepoint.immediate => !stream.timepoint
%0 = stream.timepoint.immediate => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointImport
-func.func @timepointImport(%arg0: !hal.semaphore, %arg1: index) -> !stream.timepoint {
+util.func private @timepointImport(%arg0: !hal.semaphore, %arg1: index) -> !stream.timepoint {
// CHECK: = stream.timepoint.import %arg0, %arg1 : (!hal.semaphore, index) => !stream.timepoint
%0 = stream.timepoint.import %arg0, %arg1 : (!hal.semaphore, index) => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointExport
-func.func @timepointExport(%arg0: !stream.timepoint) -> (!hal.semaphore, index) {
+util.func private @timepointExport(%arg0: !stream.timepoint) -> (!hal.semaphore, index) {
// CHECK: = stream.timepoint.export %arg0 => (!hal.semaphore, index)
%0:2 = stream.timepoint.export %arg0 => (!hal.semaphore, index)
- return %0#0, %0#1 : !hal.semaphore, index
+ util.return %0#0, %0#1 : !hal.semaphore, index
}
// -----
// CHECK-LABEL: @timepointChainExternal
-func.func @timepointChainExternal(%arg0: !stream.timepoint, %arg1: !hal.fence) {
+util.func private @timepointChainExternal(%arg0: !stream.timepoint, %arg1: !hal.fence) {
// CHECK: stream.timepoint.chain_external %arg0 => (%arg1 : !hal.fence)
stream.timepoint.chain_external %arg0 => (%arg1 : !hal.fence)
- return
+ util.return
}
// -----
// CHECK-LABEL: @timepointJoin
-func.func @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
+util.func private @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
// CHECK: = stream.timepoint.join max(%arg0, %arg1) => !stream.timepoint
%0 = stream.timepoint.join max(%arg0, %arg1) => !stream.timepoint
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointAwait
-func.func @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
+util.func private @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
%0:2 = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
- return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
+ util.return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp
index 12c9f6d..7138212 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ElideTimepoints.cpp
@@ -21,7 +21,6 @@
#include "mlir/Analysis/Liveness.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/Attributes.h"
@@ -870,7 +869,7 @@
})
.Case<cf::BranchOp, cf::CondBranchOp>(
[&](Operation *op) { elideTimepointOperands(op); })
- .Case<func::ReturnOp, scf::YieldOp>(
+ .Case<IREE::Util::ReturnOp, scf::YieldOp>(
[&](Operation *op) { elideTimepointOperands(op); });
});
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.td b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.td
index fabe47f..83f1d0f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.td
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/Passes.td
@@ -262,7 +262,6 @@
}];
let dependentDialects = [
"mlir::cf::ControlFlowDialect",
- "mlir::func::FuncDialect",
"IREE::Stream::StreamDialect",
"IREE::Util::UtilDialect",
];
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp
index 54b949d..1e0a667 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/PropagateTimepoints.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
@@ -57,7 +56,8 @@
// as duplicates will get added and we'll need to rely on global fusion to
// get rid of them. Note that this only expands globals and does not yet update
// use sites - we just need the ops to reference while doing so.
-static ExpandedGlobalMap expandResourceGlobals(Operation *rootOp) {
+static ExpandedGlobalMap expandResourceGlobals(Operation *rootOp,
+ SymbolTable &symbolTable) {
ExpandedGlobalMap expandedGlobals;
// Gather all of the resource globals in the root.
@@ -70,7 +70,6 @@
}
// Expand each global by adding the timepoint right next to it.
- SymbolTable symbolTable(rootOp);
auto timepointType = IREE::Stream::TimepointType::get(rootOp->getContext());
auto immediateAttr =
IREE::Stream::TimepointAttr::get(rootOp->getContext(), timepointType);
@@ -103,20 +102,22 @@
llvm::any_of(op->getResultTypes(), isResourceType);
}
+static void expandType(Type type, SmallVectorImpl<Type> &newTypes) {
+ newTypes.push_back(type);
+ if (isResourceType(type)) {
+ newTypes.push_back(IREE::Stream::TimepointType::get(type.getContext()));
+ }
+}
+
// Expands resources in the given |types| list to (timepoint, resource).
// This could be changed to some iterator magic to avoid the alloc.
static SmallVector<Type> expandTypes(TypeRange types) {
if (types.empty())
return {};
- auto timepointType =
- IREE::Stream::TimepointType::get(types.front().getContext());
SmallVector<Type> newTypes;
newTypes.reserve(types.size() * 2);
for (auto type : types) {
- if (isResourceType(type)) {
- newTypes.push_back(timepointType);
- }
- newTypes.push_back(type);
+ expandType(type, newTypes);
}
return newTypes;
}
@@ -150,6 +151,19 @@
}
}
+static void expandOperand(Location loc, Value operand,
+ SmallVectorImpl<Value> &newOperands,
+ IRMapping &resourceTimepointMap, OpBuilder &builder) {
+ if (isResourceType(operand.getType())) {
+ auto [timepoint, resource] =
+ consumeTimepoint(loc, operand, resourceTimepointMap, builder);
+ newOperands.push_back(resource);
+ newOperands.push_back(timepoint);
+ } else {
+ newOperands.push_back(operand);
+ }
+}
+
// Expands resources in |operands| into (timepoint, resource) pairs.
static SmallVector<Value> expandOperands(Location loc, ValueRange operands,
IRMapping &resourceTimepointMap,
@@ -157,19 +171,13 @@
SmallVector<Value> result;
result.reserve(operands.size() * 2);
for (auto operand : operands) {
- if (isResourceType(operand.getType())) {
- auto timepointOperand =
- consumeTimepoint(loc, operand, resourceTimepointMap, builder);
- result.push_back(timepointOperand.first);
- result.push_back(timepointOperand.second);
- } else {
- result.push_back(operand);
- }
+ expandOperand(loc, operand, result, resourceTimepointMap, builder);
}
return result;
}
-static void expandTimepoints(Operation *op, ExpandedGlobalMap &globalMap,
+static void expandTimepoints(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap,
IRMapping &resourceTimepointMap);
// Finds the size of a block argument resource or materializes a size if needed.
@@ -222,7 +230,7 @@
// Recursively expands resources into (timepoint, resource) pairs within the
// given |region|. All branches, ops, and nested regions will be processed.
static void expandRegion(Region ®ion, bool canModifyEntryBlock,
- ExpandedGlobalMap &globalMap,
+ SymbolTable &symbolTable, ExpandedGlobalMap &globalMap,
IRMapping resourceTimepointMap) {
if (region.empty())
return;
@@ -242,20 +250,18 @@
if (!isResourceType(resourceArg.getType()))
continue;
auto timepointArg =
- block.insertArgument(i, timepointType, resourceArg.getLoc());
+ block.insertArgument(i + 1, timepointType, resourceArg.getLoc());
expansions.push_back(std::make_pair(timepointArg, resourceArg));
resourceTimepointMap.map(resourceArg, timepointArg);
}
// Insert awaits that we've sunk from callers.
auto builder = OpBuilder::atBlockBegin(&block);
- for (auto expansion : llvm::reverse(expansions)) {
+ for (auto [timepoint, resource] : llvm::reverse(expansions)) {
// If we can look down the chain and see the size then we can use that.
// If it's a constant we can't use it as it may be defined anywhere in the
// region. Dynamic dimensions usually come from outside or entry arguments
// though and those are available.
- auto timepoint = expansion.first;
- auto resource = expansion.second;
auto resourceSize =
makeBlockArgResourceSize(region.getLoc(), resource, builder);
auto awaitOp = builder.create<IREE::Stream::TimepointAwaitOp>(
@@ -276,14 +282,14 @@
if (region.hasOneBlock()) {
for (auto &op :
llvm::make_early_inc_range(region.front().getOperations())) {
- expandTimepoints(&op, globalMap, resourceTimepointMap);
+ expandTimepoints(&op, symbolTable, globalMap, resourceTimepointMap);
}
} else {
DominanceInfo domInfo(region.getParentOp());
for (auto *blockInfo : llvm::breadth_first(domInfo.getRootNode(®ion))) {
auto *block = blockInfo->getBlock();
for (auto &op : llvm::make_early_inc_range(block->getOperations())) {
- expandTimepoints(&op, globalMap, resourceTimepointMap);
+ expandTimepoints(&op, symbolTable, globalMap, resourceTimepointMap);
}
}
}
@@ -357,23 +363,11 @@
}
static void expandInitializerOp(IREE::Util::InitializerOp op,
+ SymbolTable &symbolTable,
ExpandedGlobalMap &globalMap,
IRMapping &resourceTimepointMap) {
- expandRegion(op.getRegion(), /*canModifyEntryBlock=*/false, globalMap,
- resourceTimepointMap);
-}
-
-// Returns true if |op| is either public and visible to external modules or
-// external and resolved later on. We can't modify their signatures.
-static bool isPublicOrExternal(CallableOpInterface callableOp) {
- if (auto symbolOp = dyn_cast<SymbolOpInterface>(callableOp.getOperation())) {
- if (symbolOp.isPublic())
- return true;
- }
- auto *region = callableOp.getCallableRegion();
- if (!region || region->empty())
- return true;
- return false;
+ expandRegion(op.getRegion(), /*canModifyEntryBlock=*/false, symbolTable,
+ globalMap, resourceTimepointMap);
}
// Inserts awaits on resource arguments.
@@ -384,24 +378,25 @@
// don't need a wait.
//
// Example:
-// func.func @foo(%0: !stream.resource)
+// util.func @foo(%0: !stream.resource)
// ->
-// func.func @foo(%t: !stream.timepoint, %0: !stream.resource) {
+// util.func @foo(%t: !stream.timepoint, %0: !stream.resource) {
// %1 = stream.timepoint.await %t, %0
-static void expandFuncOp(mlir::func::FuncOp op, ExpandedGlobalMap &globalMap,
+static void expandFuncOp(IREE::Util::FuncOp op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap,
IRMapping &resourceTimepointMap) {
// Ignore public/external function signatures but still convert regions.
- bool canModifyEntryBlock = !isPublicOrExternal(op);
+ bool canModifyEntryBlock = !IREE::Util::isPublicOrExternal(op);
if (canModifyEntryBlock) {
- auto oldType = op.getFunctionType();
- auto inputTypes = expandTypes(oldType.getInputs());
- auto resultTypes = expandTypes(oldType.getResults());
- auto newType = FunctionType::get(op.getContext(), inputTypes, resultTypes);
- if (newType != oldType) {
- op.setType(newType);
- }
+ op.expandSignature(
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ });
}
- expandRegion(op.getRegion(), canModifyEntryBlock, globalMap,
+ expandRegion(op.getRegion(), canModifyEntryBlock, symbolTable, globalMap,
resourceTimepointMap);
}
@@ -414,28 +409,31 @@
//
// Example:
// %1 = stream.timepoint.await %t, %0
-// %r = call @foo(%1)
+// %r = util.call @foo(%1)
// ->
-// %rt, %r = call @foo(%t, %0)
+// %rt, %r = util.call @foo(%t, %0)
// stream.timepoint.await %rt, %t
-static void expandCallOp(mlir::func::CallOp op,
+static void expandCallOp(IREE::Util::CallOp op, SymbolTable &symbolTable,
IRMapping &resourceTimepointMap) {
if (!usesResources(op))
return;
// Ignore calls to public/external functions.
- auto calleeOp = SymbolTable::lookupNearestSymbolFrom<CallableOpInterface>(
- op, op.getCalleeAttr());
- if (isPublicOrExternal(calleeOp))
+ auto calleeOp = symbolTable.lookup<CallableOpInterface>(op.getCallee());
+ if (IREE::Util::isPublicOrExternal(calleeOp))
return;
// Build the new call op with expanded operands and results.
OpBuilder builder(op);
- auto operands = expandOperands(op.getLoc(), op.getOperands(),
- resourceTimepointMap, builder);
- auto resultTypes = expandTypes(op.getResultTypes());
- auto newOp = builder.create<mlir::func::CallOp>(op.getLoc(), op.getCallee(),
- resultTypes, operands);
+ auto newOp = op.cloneAndExpand(
+ [&](unsigned i, Value operand, SmallVectorImpl<Value> &newOperands) {
+ expandOperand(op.getLoc(), operand, newOperands, resourceTimepointMap,
+ builder);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ builder);
// Insert awaits on results that we are sinking across the call edge.
// The hope is that by moving the awaits here we can fold with uses inside
@@ -449,8 +447,8 @@
oldResult.replaceAllUsesWith(newResult);
continue;
}
- auto newTimepoint = newOp.getResult(newIdx++);
auto newResult = newOp.getResult(newIdx++);
+ auto newTimepoint = newOp.getResult(newIdx++);
resourceTimepointMap.map(newResult, newTimepoint);
auto newResultSize =
builder.create<IREE::Stream::ResourceSizeOp>(op.getLoc(), newResult)
@@ -469,19 +467,19 @@
//
// Example:
// %1 = stream.timepoint.await %t, %0
-// return %1
+// util.return %1
// ->
-// return %t, %0
-static void expandReturnOp(mlir::func::ReturnOp op,
+// util.return %t, %0
+static void expandReturnOp(IREE::Util::ReturnOp op,
IRMapping &resourceTimepointMap) {
if (!usesResources(op))
return;
- if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>()))
+ if (IREE::Util::isPublicOrExternal(op->getParentOfType<IREE::Util::FuncOp>()))
return;
OpBuilder builder(op);
auto operands = expandOperands(op.getLoc(), op.getOperands(),
resourceTimepointMap, builder);
- builder.create<mlir::func::ReturnOp>(op.getLoc(), operands);
+ builder.create<IREE::Util::ReturnOp>(op.getLoc(), operands);
op.erase();
}
@@ -491,11 +489,11 @@
// Example:
// %1 = stream.timepoint.await %t, %0
// br ^bb1(%1)
-// ^bb1(%b):
+// ^bb1(%bb_1):
// ->
-// br ^bb1(%t, %0)
-// ^bb1(%a, %b):
-// %1 = stream.timepoint.await %a, %b
+// br ^bb1(%0, %t)
+// ^bb1(%bb_0, %bb_t):
+// %1 = stream.timepoint.await %bb_t, %bb_0
static void expandBranchOp(mlir::cf::BranchOp op,
IRMapping &resourceTimepointMap) {
if (!usesResources(op))
@@ -598,19 +596,21 @@
// Recursively expands resources into (timepoint, resource) in |op|.
// Resource timepoint chains are established when possible by looking through
// awaits.
-static void expandTimepoints(Operation *op, ExpandedGlobalMap &globalMap,
+static void expandTimepoints(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap,
IRMapping &resourceTimepointMap) {
if (auto loadOp = dyn_cast<IREE::Util::GlobalLoadOpInterface>(op)) {
expandGlobalLoadOp(loadOp, globalMap, resourceTimepointMap);
} else if (auto storeOp = dyn_cast<IREE::Util::GlobalStoreOpInterface>(op)) {
expandGlobalStoreOp(storeOp, globalMap, resourceTimepointMap);
} else if (auto initializerOp = dyn_cast<IREE::Util::InitializerOp>(op)) {
- expandInitializerOp(initializerOp, globalMap, resourceTimepointMap);
- } else if (auto funcOp = dyn_cast<mlir::func::FuncOp>(op)) {
- expandFuncOp(funcOp, globalMap, resourceTimepointMap);
- } else if (auto callOp = dyn_cast<mlir::func::CallOp>(op)) {
- expandCallOp(callOp, resourceTimepointMap);
- } else if (auto returnOp = dyn_cast<mlir::func::ReturnOp>(op)) {
+ expandInitializerOp(initializerOp, symbolTable, globalMap,
+ resourceTimepointMap);
+ } else if (auto funcOp = dyn_cast<IREE::Util::FuncOp>(op)) {
+ expandFuncOp(funcOp, symbolTable, globalMap, resourceTimepointMap);
+ } else if (auto callOp = dyn_cast<IREE::Util::CallOp>(op)) {
+ expandCallOp(callOp, symbolTable, resourceTimepointMap);
+ } else if (auto returnOp = dyn_cast<IREE::Util::ReturnOp>(op)) {
expandReturnOp(returnOp, resourceTimepointMap);
} else if (auto branchOp = dyn_cast<mlir::cf::BranchOp>(op)) {
expandBranchOp(branchOp, resourceTimepointMap);
@@ -644,9 +644,10 @@
PropagateTimepointsPass> {
void runOnOperation() override {
auto rootOp = getOperation();
+ SymbolTable symbolTable(rootOp);
// Expand all util.global ops holding resources into (timepoint, resource).
- auto globalMap = expandResourceGlobals(rootOp);
+ auto globalMap = expandResourceGlobals(rootOp, symbolTable);
// Walk the entire IR tree and expand the globals.
// We could do this via pattern application but that gets much trickier to
@@ -654,7 +655,8 @@
// expanding multiple times.
for (auto callableOp : rootOp.getOps<mlir::CallableOpInterface>()) {
IRMapping resourceTimepointMap;
- expandTimepoints(callableOp, globalMap, resourceTimepointMap);
+ expandTimepoints(callableOp, symbolTable, globalMap,
+ resourceTimepointMap);
}
}
};
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
index abb4cce..bc73616 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -224,9 +223,9 @@
// Applies usage analysis results to an MLIR function.
// All resource arguments and results, block arguments, and nested operations
// will have their lifetime specified.
-struct ApplyFuncOp : public UsageRefinementPattern<mlir::func::FuncOp> {
- using UsageRefinementPattern<mlir::func::FuncOp>::UsageRefinementPattern;
- LogicalResult matchAndRewrite(mlir::func::FuncOp op,
+struct ApplyFuncOp : public UsageRefinementPattern<IREE::Util::FuncOp> {
+ using UsageRefinementPattern<IREE::Util::FuncOp>::UsageRefinementPattern;
+ LogicalResult matchAndRewrite(IREE::Util::FuncOp op,
PatternRewriter &rewriter) const override {
if (op.isExternal()) {
return rewriter.notifyMatchFailure(op, "external funcs not supported");
@@ -255,7 +254,7 @@
// Results:
SmallVector<Type> newOutputs;
- auto anyReturnOp = *op.getOps<mlir::func::ReturnOp>().begin();
+ auto anyReturnOp = *op.getOps<IREE::Util::ReturnOp>().begin();
for (auto outputType : llvm::enumerate(op.getFunctionType().getResults())) {
auto oldType =
llvm::dyn_cast<IREE::Stream::ResourceType>(outputType.value());
@@ -410,7 +409,7 @@
ApplyScfWhileOp>(context, analysis);
patterns.insert<ApplyGenericOp<IREE::Util::OptimizationBarrierOp>,
ApplyGenericOp<mlir::arith::SelectOp>,
- ApplyGenericOp<mlir::func::CallOp>,
+ ApplyGenericOp<IREE::Util::CallOp>,
ApplyGenericOp<mlir::scf::ConditionOp>,
ApplyGenericOp<mlir::scf::YieldOp>,
ApplyGenericOp<IREE::Stream::TimepointBarrierOp>>(context,
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_dispatch_arguments.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_dispatch_arguments.mlir
index feb6913..69a16bc 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_dispatch_arguments.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_dispatch_arguments.mlir
@@ -7,7 +7,7 @@
// CHECK: stream.executable.export public @dispatch
stream.executable.export public @dispatch
}
-func.func @skipExternExecutables(%arg0: i32) {
+util.func public @skipExternExecutables(%arg0: i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
@@ -17,7 +17,7 @@
rw %capture[%c0 for %c1] : !stream.resource<transient>{%c1}
}
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -32,17 +32,17 @@
stream.executable private @annotatePotentialValuesEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(
+ // CHECK: util.func public @dispatch(
// CHECK-SAME: %arg0: i32,
// CHECK-SAME: %arg1: index {stream.alignment = 4 : index, stream.values = [20 : index, 40 : index]},
// CHECK-SAME: %arg2: i1 {stream.values = [false, true]},
// CHECK-SAME: %arg3: f32
- func.func @dispatch(%arg0: i32, %arg1: index, %arg2: i1, %arg3: f32, %binding: !stream.binding) {
- return
+ util.func public @dispatch(%arg0: i32, %arg1: index, %arg2: i1, %arg3: f32, %binding: !stream.binding) {
+ util.return
}
}
}
-func.func @annotatePotentialValues(%arg0: i32) {
+util.func public @annotatePotentialValues(%arg0: i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c0_i32 = arith.constant 0 : i32
@@ -61,7 +61,7 @@
rw %capture[%c0 for %c1] : !stream.resource<transient>{%c1}
}
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -77,24 +77,24 @@
stream.executable private @annotateOperandAlignmentEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(
+ // CHECK: util.func public @dispatch(
// CHECK-SAME: %arg0: index,
// CHECK-SAME: %arg1: index {stream.alignment = 16 : index},
// CHECK-SAME: %arg2: index {stream.values = [4096 : index, 4097 : index]},
// CHECK-SAME: %arg3: index {stream.alignment = 16 : index, stream.values = [1200 : index, 5232 : index]}
// CHECK-SAME: %arg4: index {stream.alignment = 1024 : index, stream.values = [1024 : index, 2048 : index]}
- func.func @dispatch(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %binding: !stream.binding) {
- return
+ util.func public @dispatch(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %binding: !stream.binding) {
+ util.return
}
}
}
util.global private mutable @global_var = 1024 : index
-func.func @otherFunc() {
+util.func public @otherFunc() {
%c2048 = arith.constant 2048 : index
util.global.store %c2048, @global_var : index
- return
+ util.return
}
-func.func @annotateOperandAlignment(%arg0: index, %arg1: index) {
+util.func public @annotateOperandAlignment(%arg0: index, %arg1: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c16 = arith.constant 16 : index
@@ -114,7 +114,7 @@
rw %capture[%c0 for %c1] : !stream.resource<transient>{%c1}
}
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -129,17 +129,17 @@
stream.executable private @annotateBindingAlignmentEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(
+ // CHECK: util.func public @dispatch(
// CHECK-SAME: %arg0: !stream.binding {stream.alignment = 64 : index},
// CHECK-SAME: %arg1: !stream.binding,
// CHECK-SAME: %arg2: !stream.binding {stream.alignment = 8 : index},
// CHECK-SAME: %arg3: !stream.binding {stream.alignment = 16 : index})
- func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
- return
+ util.func public @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: !stream.binding) {
+ util.return
}
}
}
-func.func @annotateBindingAlignment(%arg0: index, %arg1: index) {
+util.func public @annotateBindingAlignment(%arg0: index, %arg1: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c8 = arith.constant 8 : index
@@ -163,5 +163,5 @@
rw %capture[%aligned1 for %c8] : !stream.resource<transient>{%c64}
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
index a353ac6..fd68d30 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
@@ -5,8 +5,8 @@
// CHECK: stream.executable.export public @dispatch
flow.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %[[ARG0_DIM0:.+]]: index, %[[ARG1_DIM1:.+]]: index)
- func.func @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<?x4xf32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>,
+ // CHECK: util.func public @dispatch(%arg0: !stream.binding, %arg1: !stream.binding, %[[ARG0_DIM0:.+]]: index, %[[ARG1_DIM1:.+]]: index)
+ util.func public @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<?x4xf32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>,
%arg0_dim0: index, %arg1_dim1: index) {
// CHECK: %[[ARG0_TENSOR:.+]] = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<?x4xf32>>{%[[ARG0_DIM0]]}
%arg0_tied = flow.dispatch.tie_shape %arg0 : !flow.dispatch.tensor<readonly:tensor<?x4xf32>>{%arg0_dim0}
@@ -18,13 +18,13 @@
// CHECK: flow.dispatch.tensor.store %[[TILE]], %[[ARG1_TENSOR]], offsets = [0, 0], sizes = [%[[ARG0_DIM0]], 4], strides = [1, 1] : tensor<?x4xf32> -> !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>{%[[ARG1_DIM1]]}
flow.dispatch.tensor.store %0, %arg1_tied, offsets = [0, 0], sizes = [%arg0_dim0, 4], strides = [1, 1] : tensor<?x4xf32> -> !flow.dispatch.tensor<writeonly:tensor<4x?xf32>>{%arg1_dim1}
- return
+ util.return
}
}
}
// CHECK-LABEL: @simple_mul
-func.func @simple_mul(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
+util.func public @simple_mul(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
// CHECK-DAG: %[[DIM0:.+]] = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
%dim0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
// CHECK-DAG: %[[ELEMENT_TYPE:.+]] = hal.element_type<f32>
@@ -44,8 +44,8 @@
// CHECK: %[[RET0_T:.+]] = stream.async.transfer %[[RET0]] : !stream.resource<*>{%[[RET0_SIZE]]} -> !stream.resource<external>{%[[RET0_SIZE]]}
// CHECK: %[[RET0_EXPORT:.+]] = stream.tensor.export %[[RET0_T]] : tensor<?xf32>{%[[DIM0]]} in !stream.resource<external>{%[[RET0_SIZE]]} -> !hal.buffer_view
%2 = hal.tensor.export %1 : tensor<?xf32>{%dim0} -> !hal.buffer_view
- // CHECK: return %[[RET0_EXPORT]] : !hal.buffer_view
- return %2 : !hal.buffer_view
+ // CHECK: util.return %[[RET0_EXPORT]] : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// -----
@@ -54,7 +54,7 @@
// CHECK-LABEL: @custom_ops
// CHECK-SAME: (%[[ARG:.+]]: !stream.resource<*>, %[[ARG_SIZE:.+]]: index) -> (!stream.resource<*>, index)
-func.func @custom_ops(%arg0: tensor<4x8xf32>) -> tensor<8x4xf32> {
+util.func public @custom_ops(%arg0: tensor<4x8xf32>) -> tensor<8x4xf32> {
// CHECK: %[[ARG_EXTERNAL:.+]] = stream.async.transfer %[[ARG]]
// CHECK: %[[ARG_TENSOR:.+]] = stream.tensor.export %[[ARG_EXTERNAL]]
// CHECK: %[[RET_TENSOR:.+]] = "some.op"(%[[ARG_TENSOR]]) : (tensor<4x8xf32>) -> tensor<8x4xf32>
@@ -62,8 +62,8 @@
// CHECK: %[[RET_SIZE:.+]] = stream.tensor.sizeof tensor<8x4xf32>
// CHECK: %[[RET_EXTERNAL:.+]] = stream.tensor.import %[[RET_TENSOR]]
// CHECK: %[[RET:.+]] = stream.async.transfer %[[RET_EXTERNAL]]
- // CHECK: return %[[RET]], %[[RET_SIZE]] : !stream.resource<*>, index
- return %0 : tensor<8x4xf32>
+ // CHECK: util.return %[[RET]], %[[RET_SIZE]] : !stream.resource<*>, index
+ util.return %0 : tensor<8x4xf32>
}
// -----
@@ -79,8 +79,8 @@
flow.executable.export public @dispatch
// CHECK: builtin.module
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING0:.+]]: !stream.binding, %[[BINDING1:.+]]: !stream.binding)
- func.func @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<i32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<i1>>) {
+ // CHECK: util.func public @dispatch(%[[BINDING0:.+]]: !stream.binding, %[[BINDING1:.+]]: !stream.binding)
+ util.func public @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<i32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<i1>>) {
%c3_i32 = arith.constant 3 : i32
// CHECK: %[[ARG0:.+]] = stream.binding.subspan %[[BINDING0]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<i32>>
// CHECK: %[[ARG1:.+]] = stream.binding.subspan %[[BINDING1]][%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<i1>>
@@ -95,7 +95,7 @@
} -> tensor<i1>
// CHECK: flow.dispatch.tensor.store %{{.+}}, %[[ARG1]], offsets = [], sizes = [], strides = [] : tensor<i1> -> !flow.dispatch.tensor<writeonly:tensor<i1>>
flow.dispatch.tensor.store %2, %arg1, offsets = [], sizes = [], strides = [] : tensor<i1> -> !flow.dispatch.tensor<writeonly:tensor<i1>>
- return
+ util.return
}
}
}
@@ -104,7 +104,7 @@
flow.executable private @while_test_dispatch_1 {
flow.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<i32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<i32>>) {
+ util.func public @dispatch(%arg0: !flow.dispatch.tensor<readonly:tensor<i32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<i32>>) {
%c2_i32 = arith.constant 2 : i32
%0 = flow.dispatch.tensor.load %arg0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i32>> -> tensor<i32>
%1 = tensor.empty() : tensor<i32>
@@ -114,13 +114,13 @@
linalg.yield %3 : i32
} -> tensor<i32>
flow.dispatch.tensor.store %2, %arg1, offsets = [], sizes = [], strides = [] : tensor<i32> -> !flow.dispatch.tensor<writeonly:tensor<i32>>
- return
+ util.return
}
}
}
-// CHECK-LABEL: func.func @while_test
-func.func @while_test() {
+// CHECK-LABEL: util.func public @while_test
+util.func public @while_test() {
%c1 = arith.constant 1 : index
// CHECK: %[[CONSTANT:.+]] = stream.tensor.constant : tensor<i32> in !stream.resource<constant> = dense<4> : tensor<i32>
@@ -164,7 +164,7 @@
// CHECK: %[[TENSOR_CONSTANT:.+]] = stream.tensor.export %[[EXTERNAL_CONSTANT]] : tensor<i32> in !stream.resource<external>{%[[CONSTANT_SIZE]]} -> tensor<i32>
// CHECK: check.expect_eq(%[[TENSOR_RESULT]], %[[TENSOR_CONSTANT]]) : tensor<i32>
check.expect_eq(%1, %cst) : tensor<i32>
- return
+ util.return
}
// -----
@@ -174,10 +174,10 @@
// CHECK-LABEL: unrealizedCastCleanup
// CHECK-SAME: (%[[COND:.+]]: i1, %[[LHS:.+]]: !stream.resource<*>, %[[LHS_SIZE:.+]]: index, %[[RHS:.+]]: !stream.resource<*>, %[[RHS_SIZE:.+]]: index) -> (!stream.resource<*>, index)
-func.func @unrealizedCastCleanup(%cond: i1, %lhs: tensor<1024xf32>, %rhs: tensor<1024xf32>) -> tensor<1024xf32> {
+util.func public @unrealizedCastCleanup(%cond: i1, %lhs: tensor<1024xf32>, %rhs: tensor<1024xf32>) -> tensor<1024xf32> {
// CHECK-DAG: %[[RET:.+]] = arith.select %[[COND]], %[[LHS]], %[[RHS]] : !stream.resource<*>
// CHECK-DAG: %[[RET_SIZE:.+]] = arith.select %[[COND]], %[[LHS_SIZE]], %[[RHS_SIZE]] : index
%0 = arith.select %cond, %lhs, %rhs : tensor<1024xf32>
- // CHECK: return %[[RET]], %[[RET_SIZE]]
- return %0 : tensor<1024xf32>
+ // CHECK: util.return %[[RET]], %[[RET_SIZE]]
+ util.return %0 : tensor<1024xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
index 1e960f8..1507326 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
@@ -49,7 +49,7 @@
stream.executable private @func_a_ex_0 {
stream.executable.export public @dispatch_0
builtin.module {
- func.func @dispatch_0(%arg0: !stream.binding {stream.alignment = 32 : index}, %arg1: !stream.binding {stream.alignment = 32 : index}, %arg2: !stream.binding {stream.alignment = 32 : index}) {
+ util.func public @dispatch_0(%arg0: !stream.binding {stream.alignment = 32 : index}, %arg1: !stream.binding {stream.alignment = 32 : index}, %arg2: !stream.binding {stream.alignment = 32 : index}) {
%c4 = arith.constant 4 : index
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4xi32>>
@@ -72,7 +72,7 @@
} -> tensor<?xi32>
flow.dispatch.tensor.store %9, %2, offsets = [%arg3], sizes = [%5], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<4xi32>>
}
- return
+ util.return
}
}
}
@@ -80,7 +80,7 @@
stream.executable private @func_a_ex_1 {
stream.executable.export public @dispatch_1
builtin.module {
- func.func @dispatch_1(%arg0: !stream.binding {stream.alignment = 32 : index}, %arg1: !stream.binding {stream.alignment = 32 : index}, %arg2: !stream.binding {stream.alignment = 32 : index}) {
+ util.func public @dispatch_1(%arg0: !stream.binding {stream.alignment = 32 : index}, %arg1: !stream.binding {stream.alignment = 32 : index}, %arg2: !stream.binding {stream.alignment = 32 : index}) {
%c3 = arith.constant 3 : index
%c0 = arith.constant 0 : index
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<3xi32>>
@@ -103,12 +103,12 @@
} -> tensor<?xi32>
flow.dispatch.tensor.store %9, %2, offsets = [%arg3], sizes = [%5], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<3xi32>>
}
- return
+ util.return
}
}
}
-func.func public @func_a() -> (tensor<4xi32>, tensor<4xi32>) {
+util.func public @func_a() -> (tensor<4xi32>, tensor<4xi32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
@@ -144,5 +144,5 @@
%5 = stream.tensor.export %4 : tensor<4xi32> in !stream.resource<external>{%c16} -> tensor<4xi32>
%6 = stream.timepoint.await %1 => %0 : !stream.resource<external>{%c16}
%7 = stream.tensor.export %6 : tensor<4xi32> in !stream.resource<external>{%c16} -> tensor<4xi32>
- return %5, %7 : tensor<4xi32>, tensor<4xi32>
+ util.return %5, %7 : tensor<4xi32>, tensor<4xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_async_copies.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_async_copies.mlir
index 9686177..c586855 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_async_copies.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_async_copies.mlir
@@ -5,7 +5,7 @@
// expects us to clean up.
// CHECK-LABEL: @multiUseTiedOperand
-func.func @multiUseTiedOperand(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @multiUseTiedOperand(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -22,8 +22,8 @@
%clone1 = stream.async.clone %splat : !stream.resource<*>{%size} -> !stream.resource<*>{%size}
// CHECK: %[[FILL1:.+]] = stream.async.fill %c789_i32, %[[SPLAT]]
%fill1 = stream.async.fill %c789_i32, %clone1[%c128 to %c256 for %c128] : i32 -> %3 as !stream.resource<*>{%size}
- // CHECK: return %[[FILL0]], %[[FILL1]]
- return %fill0, %fill1 : !stream.resource<*>, !stream.resource<*>
+ // CHECK: util.return %[[FILL0]], %[[FILL1]]
+ util.return %fill0, %fill1 : !stream.resource<*>, !stream.resource<*>
}
// -----
@@ -34,7 +34,7 @@
// CHECK-LABEL: @argMoveCallee
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<*>
-func.func private @argMoveCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
+util.func private @argMoveCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -42,16 +42,16 @@
%clone = stream.async.clone %arg : !stream.resource<*>{%size} -> !stream.resource<*>{%size}
// CHECK: %[[FILL:.+]] = stream.async.fill %c123_i32, %[[ARG0]]
%fill = stream.async.fill %c123_i32, %clone[%c0 to %c128 for %c128] : i32 -> %0 as !stream.resource<*>{%size}
- // CHECK: return %[[FILL]]
- return %fill : !stream.resource<*>
+ // CHECK: util.return %[[FILL]]
+ util.return %fill : !stream.resource<*>
}
// CHECK: @argMoveCaller
-func.func @argMoveCaller(%size: index) -> !stream.resource<*> {
+util.func public @argMoveCaller(%size: index) -> !stream.resource<*> {
%c123_i32 = arith.constant 123 : i32
// CHECK: stream.async.splat
%splat = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%size}
- %result = call @argMoveCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
- return %result : !stream.resource<*>
+ %result = util.call @argMoveCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
+ util.return %result : !stream.resource<*>
}
// -----
@@ -60,7 +60,7 @@
// call and passed by const-reference.
// CHECK-LABEL: @argCopyCallee
-func.func private @argCopyCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
+util.func private @argCopyCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -68,15 +68,15 @@
%clone = stream.async.clone %arg : !stream.resource<*>{%size} -> !stream.resource<*>{%size}
// CHECK: stream.async.fill
%fill = stream.async.fill %c123_i32, %clone[%c0 to %c128 for %c128] : i32 -> %0 as !stream.resource<*>{%size}
- return %fill : !stream.resource<*>
+ util.return %fill : !stream.resource<*>
}
// CHECK: @argCopyCaller
-func.func @argCopyCaller(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @argCopyCaller(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c123_i32 = arith.constant 123 : i32
// CHECK: stream.async.splat
%splat = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%size}
- %result = call @argCopyCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
- return %splat, %result : !stream.resource<*>, !stream.resource<*>
+ %result = util.call @argCopyCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
+ util.return %splat, %result : !stream.resource<*>, !stream.resource<*>
}
// -----
@@ -90,7 +90,7 @@
// CHECK-LABEL: @blockArgMove
// CHECK-SAME: (%[[COND:.+]]: i1
-func.func private @blockArgMove(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func private @blockArgMove(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -118,5 +118,5 @@
cf.cond_br %cond, ^bb1(%fill0, %bb1_1_new : !stream.resource<*>, !stream.resource<*>),
^bb2(%fill0, %bb1_1_new : !stream.resource<*>, !stream.resource<*>)
^bb2(%bb2_0: !stream.resource<*>, %bb2_1: !stream.resource<*>):
- return %bb2_0, %bb2_1 : !stream.resource<*>, !stream.resource<*>
+ util.return %bb2_0, %bb2_1 : !stream.resource<*>, !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_coverage.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_coverage.mlir
index c21c390..30aae28 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_coverage.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_coverage.mlir
@@ -16,15 +16,15 @@
}
// CHECK-LABEL: @initializedGlobals
-func.func private @initializedGlobals() -> !stream.timepoint {
+util.func private @initializedGlobals() -> !stream.timepoint {
// CHECK: %[[GLOBAL0:.+]] = util.global.load @global0
%global0 = util.global.load @global0 : !stream.timepoint
// CHECK: %[[GLOBAL1:.+]] = util.global.load @global1
%global1 = util.global.load @global1 : !stream.timepoint
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[GLOBAL0]], %[[GLOBAL1]])
%join = stream.timepoint.join max(%global0, %global1) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -32,11 +32,11 @@
// Tests that meaningful timeline ops are never marked immediate.
// CHECK-LABEL: @nonImmediate
-func.func private @nonImmediate() -> !stream.timepoint {
+util.func private @nonImmediate() -> !stream.timepoint {
// CHECK: %[[EXECUTE:.+]] = stream.cmd.execute
%0 = stream.cmd.execute with() {} => !stream.timepoint
- // CHECK: return %[[EXECUTE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[EXECUTE]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -45,7 +45,7 @@
// by both %exec1a and %exec1b and does not need to be joined.
// CHECK-LABEL: @joinChained
-func.func @joinChained() -> !stream.timepoint {
+util.func public @joinChained() -> !stream.timepoint {
// CHECK: %[[EXEC0:.+]] = stream.cmd.execute with
%exec0 = stream.cmd.execute with() {} => !stream.timepoint
// CHECK: %[[EXEC1A:.+]] = stream.cmd.execute await(%[[EXEC0]])
@@ -55,8 +55,8 @@
// CHECK: %[[EXEC0_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[EXEC0_IMM]], %[[EXEC1A]], %[[EXEC1B]])
%join = stream.timepoint.join max(%exec0, %exec1a, %exec1b) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -66,7 +66,7 @@
// CHECK-LABEL: @selectCovered
// CHECK-SAME: (%[[COND:.+]]: i1)
-func.func @selectCovered(%cond: i1) -> !stream.timepoint {
+util.func public @selectCovered(%cond: i1) -> !stream.timepoint {
// CHECK: %[[EXEC0:.+]] = stream.cmd.execute
%exec0 = stream.cmd.execute with() {} => !stream.timepoint
// CHECK: %[[EXEC1A:.+]] = stream.cmd.execute await(%[[EXEC0]])
@@ -78,8 +78,8 @@
// CHECK: %[[EXEC0_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[EXEC0_IMM]], %[[SELECT]])
%join = stream.timepoint.join max(%exec0, %select) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -87,25 +87,25 @@
// Tests that a timepoint passed along a call edge is propagated.
// %t0/%t1 are covered by the call result %call that joins the two together.
-// CHECK-LABEL: func @caller
+// CHECK-LABEL: util.func public @caller
// CHECK-SAME: (%[[T0:.+]]: !stream.timepoint, %[[T1:.+]]: !stream.timepoint)
-func.func @caller(%t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
- // CHECK: %[[CALL:.+]] = call @callee(%[[T0]], %[[T1]])
- %call = call @callee(%t0, %t1) : (!stream.timepoint, !stream.timepoint) -> !stream.timepoint
+util.func public @caller(%t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
+ // CHECK: %[[CALL:.+]] = util.call @callee(%[[T0]], %[[T1]])
+ %call = util.call @callee(%t0, %t1) : (!stream.timepoint, !stream.timepoint) -> !stream.timepoint
// CHECK-DAG: %[[T0_COVERED:.+]] = stream.timepoint.immediate
// CHECK-DAG: %[[T1_COVERED:.+]] = stream.timepoint.immediate
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_COVERED]], %[[T1_COVERED]], %[[CALL]])
%join = stream.timepoint.join max(%t0, %t1, %call) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
-// CHECK-LABEL: func private @callee
-func.func private @callee(%t0a: !stream.timepoint, %t0b: !stream.timepoint) -> !stream.timepoint {
+// CHECK-LABEL: util.func private @callee
+util.func private @callee(%t0a: !stream.timepoint, %t0b: !stream.timepoint) -> !stream.timepoint {
// CHECK-NOT: stream.timepoint.immediate
// CHECK: %[[JOIN_CALLEE:.+]] = stream.timepoint.join max
%t1 = stream.timepoint.join max(%t0a, %t0b) => !stream.timepoint
- // CHECK: return %[[JOIN_CALLEE]]
- return %t1 : !stream.timepoint
+ // CHECK: util.return %[[JOIN_CALLEE]]
+ util.return %t1 : !stream.timepoint
}
// -----
@@ -117,18 +117,18 @@
// the same and instead just handle coverage (hitting either call results is
// the same as hitting the original arg).
-// CHECK-LABEL: func @callerDupes
-func.func @callerDupes(%unknown: !stream.timepoint) -> !stream.timepoint {
- // CHECK: %[[CALL:.+]]:2 = call @calleeDupes
- %call:2 = call @calleeDupes(%unknown, %unknown) : (!stream.timepoint, !stream.timepoint) -> (!stream.timepoint, !stream.timepoint)
+// CHECK-LABEL: util.func public @callerDupes
+util.func public @callerDupes(%unknown: !stream.timepoint) -> !stream.timepoint {
+ // CHECK: %[[CALL:.+]]:2 = util.call @calleeDupes
+ %call:2 = util.call @calleeDupes(%unknown, %unknown) : (!stream.timepoint, !stream.timepoint) -> (!stream.timepoint, !stream.timepoint)
// CHECK-NEXT: %[[UNKNOWN_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[UNKNOWN_IMM]], %[[CALL]]#0, %[[CALL]]#1)
%join = stream.timepoint.join max(%unknown, %call#0, %call#1) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
-func.func private @calleeDupes(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> (!stream.timepoint, !stream.timepoint) {
- return %arg0, %arg1 : !stream.timepoint, !stream.timepoint
+util.func private @calleeDupes(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> (!stream.timepoint, !stream.timepoint) {
+ util.return %arg0, %arg1 : !stream.timepoint, !stream.timepoint
}
// -----
@@ -140,24 +140,24 @@
// TODO(benvanik): we should also be able to trim the calls/t1 and only use
// %t01 but that needs some work to know that call0 == t0 and call1 == t01.
-// CHECK-LABEL: func @nonUniformCaller
+// CHECK-LABEL: util.func public @nonUniformCaller
// CHECK-SAME: (%[[T0:.+]]: !stream.timepoint, %[[T1:.+]]: !stream.timepoint)
-func.func @nonUniformCaller(%t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
- // CHECK: %[[CALL0:.+]] = call @nonUniformCallee(%[[T0]])
- %call0 = call @nonUniformCallee(%t0) : (!stream.timepoint) -> !stream.timepoint
+util.func public @nonUniformCaller(%t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
+ // CHECK: %[[CALL0:.+]] = util.call @nonUniformCallee(%[[T0]])
+ %call0 = util.call @nonUniformCallee(%t0) : (!stream.timepoint) -> !stream.timepoint
// CHECK: %[[T01:.+]] = stream.timepoint.join max(%[[T0]], %[[T1]])
%t01 = stream.timepoint.join max(%t0, %t1) => !stream.timepoint
- // CHECK: %[[CALL1:.+]] = call @nonUniformCallee(%[[T01]])
- %call1 = call @nonUniformCallee(%t01) : (!stream.timepoint) -> !stream.timepoint
+ // CHECK: %[[CALL1:.+]] = util.call @nonUniformCallee(%[[T01]])
+ %call1 = util.call @nonUniformCallee(%t01) : (!stream.timepoint) -> !stream.timepoint
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[CALL0]], %[[T1]], %[[CALL1]])
%join = stream.timepoint.join max(%t0, %call0, %t1, %call1) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
-// CHECK: func private @nonUniformCallee
-func.func private @nonUniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
- return %arg0 : !stream.timepoint
+// CHECK: util.func private @nonUniformCallee
+util.func private @nonUniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
+ util.return %arg0 : !stream.timepoint
}
// -----
@@ -165,9 +165,9 @@
// Tests that timepoints are tracked through branches args.
// In this simple case %bb1_t0 always covers %t0.
-// CHECK-LABEL: func @branch
+// CHECK-LABEL: util.func public @branch
// CHECK-SAME: (%[[T0:.+]]: !stream.timepoint)
-func.func @branch(%t0: !stream.timepoint) -> !stream.timepoint {
+util.func public @branch(%t0: !stream.timepoint) -> !stream.timepoint {
// CHECK: cf.br ^bb1
cf.br ^bb1(%t0 : !stream.timepoint)
// CHECK-NEXT: ^bb1(%[[BB1_T0:.+]]: !stream.timepoint)
@@ -175,8 +175,8 @@
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[BB1_T0]])
%join = stream.timepoint.join max(%t0, %bb1_t0) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -184,9 +184,9 @@
// Tests that forward edges with convergent timepoints track coverage.
// Here both true and false paths cover %t0 and it can be elided at the join.
-// CHECK-LABEL: func @branchConvergentForwardEdge
+// CHECK-LABEL: util.func public @branchConvergentForwardEdge
// CHECK-SAME: (%[[COND:.+]]: i1, %[[T0:.+]]: !stream.timepoint)
-func.func @branchConvergentForwardEdge(%cond: i1, %t0: !stream.timepoint) -> !stream.timepoint {
+util.func public @branchConvergentForwardEdge(%cond: i1, %t0: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[T1A:.+]] = stream.cmd.execute await(%[[T0]])
%t1a = stream.cmd.execute await(%t0) => with() {} => !stream.timepoint
// CHECK: %[[T1B:.+]] = stream.cmd.execute await(%[[T0]])
@@ -200,8 +200,8 @@
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[BB1_ARG]])
%join = stream.timepoint.join max(%t0, %bb1_arg) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -209,9 +209,9 @@
// Tests that forward edges with divergent timepoint coverage get propagated.
// %t0 is covered on both paths but %t1 is only covered when %cond == true.
-// CHECK-LABEL: func @branchDivergentForwardEdge
+// CHECK-LABEL: util.func public @branchDivergentForwardEdge
// CHECK-SAME: (%[[COND:.+]]: i1, %[[T0:.+]]: !stream.timepoint, %[[T1:.+]]: !stream.timepoint)
-func.func @branchDivergentForwardEdge(%cond: i1, %t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
+util.func public @branchDivergentForwardEdge(%cond: i1, %t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[T01:.+]] = stream.timepoint.join max(%[[T0]], %[[T1]])
%t01 = stream.timepoint.join max(%t0, %t1) => !stream.timepoint
// CHECK-NEXT: cf.cond_br
@@ -223,8 +223,8 @@
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[T1]], %[[BB1_ARG]])
%join = stream.timepoint.join max(%t0, %t1, %bb1_arg) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -236,9 +236,9 @@
// must-be-executed-context-like machinery in order to do so. We just want to
// make sure we're preserving the timepoints here for correctness.
-// CHECK-LABEL: func @branchDivergentBackEdge
+// CHECK-LABEL: util.func public @branchDivergentBackEdge
// CHECK-SAME: (%[[COND:.+]]: i1, %[[T0:.+]]: !stream.timepoint)
-func.func @branchDivergentBackEdge(%cond: i1, %t0: !stream.timepoint) -> !stream.timepoint {
+util.func public @branchDivergentBackEdge(%cond: i1, %t0: !stream.timepoint) -> !stream.timepoint {
// CHECK: cf.br ^bb1
cf.br ^bb1(%cond, %t0 : i1, !stream.timepoint)
// CHECK-NEXT: ^bb1(%[[BB1_COND:.+]]: i1, %[[BB1_T0:.+]]: !stream.timepoint)
@@ -256,8 +256,8 @@
^bb2(%bb2_t1: !stream.timepoint):
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0]], %[[BB2_T1]])
%join = stream.timepoint.join max(%t0, %bb2_t1) => !stream.timepoint
- // CHECK-NEXT: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK-NEXT: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// -----
@@ -265,9 +265,9 @@
// Tests that scf.if regions with convergent yields are handled.
// Here %t0 is covered regardless of the %cond and can be elided.
-// CHECK-LABEL: func @scfIfConvergent
+// CHECK-LABEL: util.func public @scfIfConvergent
// CHECK-SAME: (%[[COND:.+]]: i1, %[[T0:.+]]: !stream.timepoint, %[[T1:.+]]: !stream.timepoint)
-func.func @scfIfConvergent(%cond: i1, %t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
+util.func public @scfIfConvergent(%cond: i1, %t0: !stream.timepoint, %t1: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[IF:.+]] = scf.if
%if = scf.if %cond -> !stream.timepoint {
// CHECK: yield %[[T0]]
@@ -281,8 +281,8 @@
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[JOIN:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[T1]], %[[IF]])
%join = stream.timepoint.join max(%t0, %t1, %if) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %join : !stream.timepoint
}
// TODO(benvanik): support scf.for
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_immediate.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_immediate.mlir
index 147c1c6..aa058c5 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_immediate.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/elide_timepoints_immediate.mlir
@@ -3,14 +3,14 @@
// Tests that joins with multiple immediate timepoints are marked as immediate.
// CHECK-LABEL: @immediateJoin
-func.func private @immediateJoin() -> !stream.timepoint {
+util.func private @immediateJoin() -> !stream.timepoint {
%imm0 = stream.timepoint.immediate => !stream.timepoint
%imm1 = stream.timepoint.immediate => !stream.timepoint
// CHECK: stream.timepoint.join
// CHECK-NEXT: %[[JOIN_IMM:.+]] = stream.timepoint.immediate
%0 = stream.timepoint.join max(%imm0, %imm1) => !stream.timepoint
- // CHECK: return %[[JOIN_IMM]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[JOIN_IMM]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -19,13 +19,13 @@
// CHECK-LABEL: @nonImmediateJoin
// CHECK-SAME: (%[[NON_IMM:.+]]: !stream.timepoint)
-func.func @nonImmediateJoin(%arg0: !stream.timepoint) -> !stream.timepoint {
+util.func public @nonImmediateJoin(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[NON_IMM]], %[[IMM]])
%0 = stream.timepoint.join max(%arg0, %imm) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -33,14 +33,14 @@
// Tests that a select between immediate values is marked immediate.
// CHECK-LABEL: @selectSame
-func.func @selectSame(%cond: i1) -> !stream.timepoint {
+util.func public @selectSame(%cond: i1) -> !stream.timepoint {
%imm0 = stream.timepoint.immediate => !stream.timepoint
%imm1 = stream.timepoint.immediate => !stream.timepoint
// CHECK: arith.select
// CHECK-NEXT: %[[SELECT_IMM:.+]] = stream.timepoint.immediate
%select = arith.select %cond, %imm0, %imm1 : !stream.timepoint
- // CHECK: return %[[SELECT_IMM]]
- return %select : !stream.timepoint
+ // CHECK: util.return %[[SELECT_IMM]]
+ util.return %select : !stream.timepoint
}
// -----
@@ -48,12 +48,12 @@
// Tests that a select with one or more unknown value is not marked immediate.
// CHECK-LABEL: @selectDifferent
-func.func @selectDifferent(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
+util.func public @selectDifferent(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK: %[[SELECT:.+]] = arith.select
%select = arith.select %cond, %imm, %unknown : !stream.timepoint
- // CHECK: return %[[SELECT]]
- return %select : !stream.timepoint
+ // CHECK: util.return %[[SELECT]]
+ util.return %select : !stream.timepoint
}
// -----
@@ -63,11 +63,11 @@
util.global private mutable @global = #stream.timepoint<immediate> : !stream.timepoint
// CHECK-LABEL: @immediateGlobal
-func.func private @immediateGlobal() -> !stream.timepoint {
+util.func private @immediateGlobal() -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%global = util.global.load @global : !stream.timepoint
- // CHECK: return %[[IMM]]
- return %global : !stream.timepoint
+ // CHECK: util.return %[[IMM]]
+ util.return %global : !stream.timepoint
}
// -----
@@ -77,19 +77,19 @@
util.global private mutable @global : !stream.timepoint
// CHECK-LABEL: @uniformGlobal
-func.func private @uniformGlobal() -> !stream.timepoint {
+util.func private @uniformGlobal() -> !stream.timepoint {
%imm = stream.timepoint.immediate => !stream.timepoint
util.global.store %imm, @global : !stream.timepoint
// CHECK: util.global.load
%global = util.global.load @global : !stream.timepoint
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
- // CHECK: return %[[IMM]]
- return %global : !stream.timepoint
+ // CHECK: util.return %[[IMM]]
+ util.return %global : !stream.timepoint
}
-func.func private @globalSetter() {
+util.func private @globalSetter() {
%imm = stream.timepoint.immediate => !stream.timepoint
util.global.store %imm, @global : !stream.timepoint
- return
+ util.return
}
// -----
@@ -99,16 +99,16 @@
util.global private mutable @global = #stream.timepoint<immediate> : !stream.timepoint
// CHECK-LABEL: @nonUniformGlobal
-func.func private @nonUniformGlobal() -> !stream.timepoint {
+util.func private @nonUniformGlobal() -> !stream.timepoint {
// CHECK-NOT: stream.timepoint.immediate
// CHECK: %[[GLOBAL:.+]] = util.global.load @global
%global = util.global.load @global : !stream.timepoint
- // CHECK: return %[[GLOBAL]]
- return %global : !stream.timepoint
+ // CHECK: util.return %[[GLOBAL]]
+ util.return %global : !stream.timepoint
}
-func.func @globalSetter(%arg0: !stream.timepoint) {
+util.func public @globalSetter(%arg0: !stream.timepoint) {
util.global.store %arg0, @global : !stream.timepoint
- return
+ util.return
}
// -----
@@ -116,92 +116,92 @@
// Tests that meaningful timeline ops are never marked immediate.
// CHECK-LABEL: @nonImmediate
-func.func private @nonImmediate() -> !stream.timepoint {
+util.func private @nonImmediate() -> !stream.timepoint {
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK: %[[EXECUTE:.+]] = stream.cmd.execute
%0 = stream.cmd.execute await(%imm) => with() {} => !stream.timepoint
- // CHECK: return %[[EXECUTE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[EXECUTE]]
+ util.return %0 : !stream.timepoint
}
// -----
// Tests that an immediate timepoint passed along a call edge is propagated.
-// CHECK-LABEL: func @caller
-func.func @caller() -> !stream.timepoint {
+// CHECK-LABEL: util.func public @caller
+util.func public @caller() -> !stream.timepoint {
// CHECK: %[[T0_IMM:.+]] = stream.timepoint.immediate
%t0 = stream.timepoint.immediate => !stream.timepoint
- // CHECK: %[[T1:.+]] = call @callee(%[[T0_IMM]], %[[T0_IMM]])
+ // CHECK: %[[T1:.+]] = util.call @callee(%[[T0_IMM]], %[[T0_IMM]])
// CHECK-NEXT: %[[T1_IMM:.+]] = stream.timepoint.immediate
- %t1 = call @callee(%t0, %t0) : (!stream.timepoint, !stream.timepoint) -> !stream.timepoint
+ %t1 = util.call @callee(%t0, %t0) : (!stream.timepoint, !stream.timepoint) -> !stream.timepoint
// CHECK: %[[T2:.+]] = stream.timepoint.join max(%[[T0_IMM]], %[[T1_IMM]])
// CHECK-NEXT: %[[T2_IMM:.+]] = stream.timepoint.immediate
%t2 = stream.timepoint.join max(%t0, %t1) => !stream.timepoint
- // CHECK: return %[[T2_IMM]]
- return %t2 : !stream.timepoint
+ // CHECK: util.return %[[T2_IMM]]
+ util.return %t2 : !stream.timepoint
}
-// CHECK-LABEL: func private @callee
-func.func private @callee(%t0a: !stream.timepoint, %t0b: !stream.timepoint) -> !stream.timepoint {
+// CHECK-LABEL: util.func private @callee
+util.func private @callee(%t0a: !stream.timepoint, %t0b: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[T0A_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[T0B_IMM:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[T1:.+]] = stream.timepoint.join max(%[[T0A_IMM]], %[[T0B_IMM]])
%t1 = stream.timepoint.join max(%t0a, %t0b) => !stream.timepoint
// CHECK-NEXT: %[[T1_IMM:.+]] = stream.timepoint.immediate
- // CHECK-NEXT: return %[[T1_IMM]]
- return %t1 : !stream.timepoint
+ // CHECK-NEXT: util.return %[[T1_IMM]]
+ util.return %t1 : !stream.timepoint
}
// -----
// Tests that duplicate call args/results are handled correctly.
-// CHECK-LABEL: func @callerDupes
-func.func @callerDupes() -> !stream.timepoint {
+// CHECK-LABEL: util.func public @callerDupes
+util.func public @callerDupes() -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
- // CHECK: %[[CALL:.+]]:2 = call @calleeDupes
+ // CHECK: %[[CALL:.+]]:2 = util.call @calleeDupes
// CHECK-NEXT: %[[CALL_IMM0:.+]] = stream.timepoint.immediate
// CHECK-NEXT: %[[CALL_IMM1:.+]] = stream.timepoint.immediate
- %call:2 = call @calleeDupes(%imm, %imm) : (!stream.timepoint, !stream.timepoint) -> (!stream.timepoint, !stream.timepoint)
+ %call:2 = util.call @calleeDupes(%imm, %imm) : (!stream.timepoint, !stream.timepoint) -> (!stream.timepoint, !stream.timepoint)
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[CALL_IMM0]], %[[CALL_IMM1]])
// CHECK-NEXT: %[[JOIN_IMM:.+]] = stream.timepoint.immediate
%join = stream.timepoint.join max(%call#0, %call#1) => !stream.timepoint
- // CHECK: return %[[JOIN_IMM]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN_IMM]]
+ util.return %join : !stream.timepoint
}
-func.func private @calleeDupes(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> (!stream.timepoint, !stream.timepoint) {
- return %arg0, %arg1 : !stream.timepoint, !stream.timepoint
+util.func private @calleeDupes(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> (!stream.timepoint, !stream.timepoint) {
+ util.return %arg0, %arg1 : !stream.timepoint, !stream.timepoint
}
// -----
// Tests that convergent caller timepoints are handled correctly.
-// CHECK-LABEL: func @uniformCaller
-func.func @uniformCaller() -> !stream.timepoint {
+// CHECK-LABEL: util.func public @uniformCaller
+util.func public @uniformCaller() -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK-NEXT: call @uniformCallee(%[[IMM]])
// CHECK-NEXT: %[[CALL_IMM0:.+]] = stream.timepoint.immediate
- %call0 = call @uniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
+ %call0 = util.call @uniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
// CHECK-NEXT: call @uniformCallee(%[[IMM]])
// CHECK-NEXT: %[[CALL_IMM1:.+]] = stream.timepoint.immediate
- %call1 = call @uniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
+ %call1 = util.call @uniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
// CHECK-NEXT: %[[CALLER_JOIN:.+]] = stream.timepoint.join max(%[[CALL_IMM0]], %[[CALL_IMM1]])
// CHECK-NEXT: %[[CALLER_JOIN_IMM:.+]] = stream.timepoint.immediate
%join = stream.timepoint.join max(%call0, %call1) => !stream.timepoint
- // CHECK: return %[[CALLER_JOIN_IMM]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[CALLER_JOIN_IMM]]
+ util.return %join : !stream.timepoint
}
-// CHECK: func private @uniformCallee
-func.func private @uniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
+// CHECK: util.func private @uniformCallee
+util.func private @uniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[ARG0_IMM:.+]] = stream.timepoint.immediate
// CHECK: %[[CALLEE_JOIN:.+]] = stream.timepoint.join max(%[[ARG0_IMM]])
// CHECK-NEXT: %[[CALLEE_JOIN_IMM:.+]] = stream.timepoint.immediate
%0 = stream.timepoint.join max(%arg0) => !stream.timepoint
- // CHECK: return %[[CALLEE_JOIN_IMM]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[CALLEE_JOIN_IMM]]
+ util.return %0 : !stream.timepoint
}
// -----
@@ -211,37 +211,37 @@
// should be immediate - today, though, we aggregate over callers and any one
// that may pass a non-immediate poisons the analysis.
-// CHECK-LABEL: func @nonUniformCaller
+// CHECK-LABEL: util.func public @nonUniformCaller
// CHECK-SAME: (%[[UNKNOWN:.+]]: !stream.timepoint)
-func.func @nonUniformCaller(%unknown: !stream.timepoint) -> !stream.timepoint {
+util.func public @nonUniformCaller(%unknown: !stream.timepoint) -> !stream.timepoint {
// CHECK-NOT: stream.timepoint.immediate
- // CHECK: %[[CALL0:.+]] = call @nonUniformCallee(%[[UNKNOWN]])
- %call0 = call @nonUniformCallee(%unknown) : (!stream.timepoint) -> !stream.timepoint
+ // CHECK: %[[CALL0:.+]] = util.call @nonUniformCallee(%[[UNKNOWN]])
+ %call0 = util.call @nonUniformCallee(%unknown) : (!stream.timepoint) -> !stream.timepoint
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
- // CHECK: %[[CALL1:.+]] = call @nonUniformCallee(%[[IMM]])
- %call1 = call @nonUniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
+ // CHECK: %[[CALL1:.+]] = util.call @nonUniformCallee(%[[IMM]])
+ %call1 = util.call @nonUniformCallee(%imm) : (!stream.timepoint) -> !stream.timepoint
// CHECK: %[[CALLER_JOIN:.+]] = stream.timepoint.join max(%[[CALL0]], %[[CALL1]])
%join = stream.timepoint.join max(%call0, %call1) => !stream.timepoint
- // CHECK: return %[[CALLER_JOIN]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[CALLER_JOIN]]
+ util.return %join : !stream.timepoint
}
-// CHECK-LABEL: func private @nonUniformCallee
+// CHECK-LABEL: util.func private @nonUniformCallee
// CHECK-SAME: (%[[CALLEE_ARG:.+]]: !stream.timepoint)
-func.func private @nonUniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
+util.func private @nonUniformCallee(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK-NOT: stream.timepoint.immediate
// CHECK: %[[CALLEE_JOIN:.+]] = stream.timepoint.join max(%[[CALLEE_ARG]])
%0 = stream.timepoint.join max(%arg0) => !stream.timepoint
- // CHECK: return %[[CALLEE_JOIN]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[CALLEE_JOIN]]
+ util.return %0 : !stream.timepoint
}
// -----
// Tests that an immediate timepoint passed along a block edge is propagated.
-// CHECK-LABEL: func @branch
-func.func @branch() -> !stream.timepoint {
+// CHECK-LABEL: util.func public @branch
+util.func public @branch() -> !stream.timepoint {
%t0 = stream.timepoint.immediate => !stream.timepoint
// CHECK: cf.br ^bb1
cf.br ^bb1(%t0 : !stream.timepoint)
@@ -251,8 +251,8 @@
// CHECK-NEXT: %[[T1:.+]] = stream.timepoint.join max(%[[BB1_T0_IMMEDIATE]])
%t1 = stream.timepoint.join max(%bb1_t0) => !stream.timepoint
// CHECK-NEXT: %[[JOIN_IMMEDIATE:.+]] = stream.timepoint.immediate
- // CHECK-NEXT: return %[[JOIN_IMMEDIATE]]
- return %t1 : !stream.timepoint
+ // CHECK-NEXT: util.return %[[JOIN_IMMEDIATE]]
+ util.return %t1 : !stream.timepoint
}
// -----
@@ -260,8 +260,8 @@
// Tests that forward edges with convergently immediate timepoints get
// propagated.
-// CHECK-LABEL: func @branchConvergentForwardEdge
-func.func @branchConvergentForwardEdge(%cond: i1) -> !stream.timepoint {
+// CHECK-LABEL: util.func public @branchConvergentForwardEdge
+util.func public @branchConvergentForwardEdge(%cond: i1) -> !stream.timepoint {
// CHECK: %[[IMM0:.+]] = stream.timepoint.immediate
%imm0 = stream.timepoint.immediate => !stream.timepoint
// CHECK: %[[IMM1:.+]] = stream.timepoint.immediate
@@ -273,17 +273,17 @@
// CHECK-NEXT: ^bb1(%[[BB1_ARG:.+]]: !stream.timepoint)
^bb1(%bb1_arg: !stream.timepoint):
// CHECK: %[[BB1_IMM:.+]] = stream.timepoint.immediate
- // CHECK: return %[[BB1_IMM]]
- return %bb1_arg : !stream.timepoint
+ // CHECK: util.return %[[BB1_IMM]]
+ util.return %bb1_arg : !stream.timepoint
}
// -----
// Tests that forward edges with divergent timepoints don't get propagated.
-// CHECK-LABEL: func @branchDivergentForwardEdge
+// CHECK-LABEL: util.func public @branchDivergentForwardEdge
// CHECK-SAME: (%[[COND:.+]]: i1, %[[UNKNOWN:.+]]: !stream.timepoint)
-func.func @branchDivergentForwardEdge(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
+util.func public @branchDivergentForwardEdge(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK-NEXT: cf.cond_br %[[COND]]
@@ -292,16 +292,16 @@
cf.cond_br %cond, ^bb1(%unknown : !stream.timepoint), ^bb1(%imm : !stream.timepoint)
// CHECK-NEXT: ^bb1(%[[BB1_ARG:.+]]: !stream.timepoint)
^bb1(%bb1_arg: !stream.timepoint):
- // CHECK: return %[[BB1_ARG]]
- return %bb1_arg : !stream.timepoint
+ // CHECK: util.return %[[BB1_ARG]]
+ util.return %bb1_arg : !stream.timepoint
}
// -----
// Tests that back edges with divergent timepoints don't get propagated.
-// CHECK-LABEL: func @branchDivergentBackEdge
-func.func @branchDivergentBackEdge(%cond: i1) -> !stream.timepoint {
+// CHECK-LABEL: util.func public @branchDivergentBackEdge
+util.func public @branchDivergentBackEdge(%cond: i1) -> !stream.timepoint {
%t0 = stream.timepoint.immediate => !stream.timepoint
// CHECK: cf.br ^bb1
cf.br ^bb1(%cond, %t0 : i1, !stream.timepoint)
@@ -318,17 +318,17 @@
cf.cond_br %bb1_cond, ^bb1(%cond_false, %bb1_t1 : i1, !stream.timepoint), ^bb2(%bb1_t1 : !stream.timepoint)
// CHECK-NEXT: ^bb2(%[[BB2_T1:.+]]: !stream.timepoint)
^bb2(%bb2_t1: !stream.timepoint):
- // CHECK-NEXT: return %[[BB2_T1]]
- return %bb2_t1 : !stream.timepoint
+ // CHECK-NEXT: util.return %[[BB2_T1]]
+ util.return %bb2_t1 : !stream.timepoint
}
// -----
// Tests that scf.if regions with convergent yields are handled.
-// CHECK-LABEL: func @scfIfConvergent
+// CHECK-LABEL: util.func public @scfIfConvergent
// CHECK-SAME: (%[[COND:.+]]: i1)
-func.func @scfIfConvergent(%cond: i1) -> !stream.timepoint {
+util.func public @scfIfConvergent(%cond: i1) -> !stream.timepoint {
// CHECK: %[[IF:.+]] = scf.if
%if = scf.if %cond -> !stream.timepoint {
// CHECK: %[[IMM0:.+]] = stream.timepoint.immediate
@@ -345,17 +345,17 @@
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[IF_IMM]])
// CHECK-NEXT: %[[JOIN_IMM:.+]] = stream.timepoint.immediate
%join = stream.timepoint.join max(%if) => !stream.timepoint
- // CHECK: return %[[JOIN_IMM]]
- return %join : !stream.timepoint
+ // CHECK: util.return %[[JOIN_IMM]]
+ util.return %join : !stream.timepoint
}
// -----
// Tests that scf.if regions with divergent yields are handled.
-// CHECK-LABEL: func @scfIfDivergent
+// CHECK-LABEL: util.func public @scfIfDivergent
// CHECK-SAME: (%[[COND:.+]]: i1, %[[UNKNOWN:.+]]: !stream.timepoint)
-func.func @scfIfDivergent(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
+util.func public @scfIfDivergent(%cond: i1, %unknown: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[IMM:.+]] = stream.timepoint.immediate
%imm = stream.timepoint.immediate => !stream.timepoint
// CHECK: %[[IF:.+]] = scf.if
@@ -371,8 +371,8 @@
// CHECK-NOT: stream.timepoint.immediate
// CHECK: %[[JOIN_OUTER:.+]] = stream.timepoint.join max(%[[UNKNOWN]], %[[IF]])
%join_outer = stream.timepoint.join max(%unknown, %0) => !stream.timepoint
- // CHECK: return %[[JOIN_OUTER]]
- return %join_outer : !stream.timepoint
+ // CHECK: util.return %[[JOIN_OUTER]]
+ util.return %join_outer : !stream.timepoint
}
// TODO(benvanik): support scf.for
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/emplace_allocations.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/emplace_allocations.mlir
index 58bfef1..3a10e50 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/emplace_allocations.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/emplace_allocations.mlir
@@ -1,9 +1,9 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-stream-emplace-allocations))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module( util.func(iree-stream-emplace-allocations))' %s | FileCheck %s
// Tests that a dispatch result is placed into the target of an update.
// CHECK-LABEL: @emplaceDispatch
-func.func @emplaceDispatch(
+util.func public @emplaceDispatch(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_OFFSET:arg[0-9]+]]: index, %[[UPDATE_SIZE:arg[0-9]+]]: index,
@@ -18,8 +18,8 @@
%update = stream.async.dispatch @ex::@dispatch(%input[%c0 to %input_size for %input_size]) : (!stream.resource<*>{%input_size}) -> !stream.resource<*>{%update_size}
// CHECK-NOT: stream.async.update
%result = stream.async.update %update, %target[%update_offset to %update_end] : !stream.resource<*>{%update_size} -> %target as !stream.resource<*>{%target_size}
- // CHECK: return %[[RESULT]]
- return %result : !stream.resource<*>
+ // CHECK: util.return %[[RESULT]]
+ util.return %result : !stream.resource<*>
}
// -----
@@ -29,7 +29,7 @@
// if the dispatch requires in-place operation that may not be safe.
// CHECK-LABEL: @dontEmplaceTiedDispatch
-func.func @dontEmplaceTiedDispatch(
+util.func public @dontEmplaceTiedDispatch(
%tied_input: !stream.resource<*>, %tied_input_size: index,
%update_offset: index, %update_size: index,
%target: !stream.resource<*>, %target_size: index) -> !stream.resource<*> {
@@ -39,8 +39,8 @@
%update = stream.async.dispatch @ex::@dispatch(%tied_input[%c0 to %tied_input_size for %tied_input_size]) : (!stream.resource<*>{%tied_input_size}) -> %tied_input{%tied_input_size}
// CHECK: %[[RESULT:.+]] = stream.async.update %[[TIED_RESULT]]
%result = stream.async.update %update, %target[%update_offset to %update_end] : !stream.resource<*>{%tied_input_size} -> %target as !stream.resource<*>{%target_size}
- // CHECK: return %[[RESULT]]
- return %result : !stream.resource<*>
+ // CHECK: util.return %[[RESULT]]
+ util.return %result : !stream.resource<*>
}
// -----
@@ -50,7 +50,7 @@
// test that explicitly as it's 95% of what this pass is designed to optimize.
// CHECK-LABEL: @emplaceDispatchSequence
-func.func @emplaceDispatchSequence(
+util.func public @emplaceDispatchSequence(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_SIZE:arg[0-9]+]]: index, %[[TARGET_SIZE:arg[0-9]+]]: index
@@ -79,8 +79,8 @@
%target2 = stream.async.update %update2, %target1[%c98304 to %c147456] : !stream.resource<*>{%update_size} -> %target1 as !stream.resource<*>{%target_size}
// CHECK-NOT: stream.async.update
%target3 = stream.async.update %update3, %target2[%c147456 to %c196608] : !stream.resource<*>{%update_size} -> %target2 as !stream.resource<*>{%target_size}
- // CHECK: return %[[TARGET3]]
- return %target3 : !stream.resource<*>
+ // CHECK: util.return %[[TARGET3]]
+ util.return %target3 : !stream.resource<*>
}
// -----
@@ -89,7 +89,7 @@
// dependencies shouldn't stop us from emplacing.
// CHECK-LABEL: @emplaceMultiResultDispatchSequence
-func.func @emplaceMultiResultDispatchSequence(
+util.func public @emplaceMultiResultDispatchSequence(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_SIZE:arg[0-9]+]]: index, %[[TARGET_SIZE:arg[0-9]+]]: index
@@ -118,8 +118,8 @@
%target2 = stream.async.update %update2, %target1[%c98304 to %c147456] : !stream.resource<*>{%update_size} -> %target1 as !stream.resource<*>{%target_size}
// CHECK-NOT: stream.async.update
%target3 = stream.async.update %update3, %target2[%c147456 to %c196608] : !stream.resource<*>{%update_size} -> %target2 as !stream.resource<*>{%target_size}
- // CHECK: return %[[TARGET3]]
- return %target3 : !stream.resource<*>
+ // CHECK: util.return %[[TARGET3]]
+ util.return %target3 : !stream.resource<*>
}
// -----
@@ -129,7 +129,7 @@
// into the same dispatch.
// CHECK-LABEL: @emplaceMultiResultDispatchInto
-func.func @emplaceMultiResultDispatchInto(
+util.func public @emplaceMultiResultDispatchInto(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_SIZE:arg[0-9]+]]: index, %[[TARGET_SIZE:arg[0-9]+]]: index
@@ -149,8 +149,8 @@
%target0 = stream.async.update %update#0, %target[%c0 to %c32] : !stream.resource<*>{%update_size} -> %target as !stream.resource<*>{%target_size}
// CHECK-NOT: stream.async.update
%target1 = stream.async.update %update#1, %target0[%c32 to %c64] : !stream.resource<*>{%update_size} -> %target0 as !stream.resource<*>{%target_size}
- // CHECK: return %[[DISPATCH]]#1
- return %target1 : !stream.resource<*>
+ // CHECK: util.return %[[DISPATCH]]#1
+ util.return %target1 : !stream.resource<*>
}
// -----
@@ -163,7 +163,7 @@
// third as the second isn't placed.
// CHECK-LABEL: @dontEmplaceSparseMultiResult
-func.func @dontEmplaceSparseMultiResult(
+util.func public @dontEmplaceSparseMultiResult(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_SIZE:arg[0-9]+]]: index, %[[TARGET_SIZE:arg[0-9]+]]: index
@@ -183,8 +183,8 @@
%target0 = stream.async.update %update#0, %target[%c0 to %c32] : !stream.resource<*>{%update_size} -> %target as !stream.resource<*>{%target_size}
// CHECK: %[[TARGET1:.+]] = stream.async.update %[[DISPATCH]]#2, %[[DISPATCH]]#0[%c32 to %c64]
%target1 = stream.async.update %update#2, %target0[%c32 to %c64] : !stream.resource<*>{%update_size} -> %target0 as !stream.resource<*>{%target_size}
- // CHECK: return %[[TARGET1]]
- return %target1 : !stream.resource<*>
+ // CHECK: util.return %[[TARGET1]]
+ util.return %target1 : !stream.resource<*>
}
// -----
@@ -192,7 +192,7 @@
// Tests that sequences with data dependencies don't hoist beyond them.
// CHECK-LABEL: @emplaceDependentDispatchSequence
-func.func @emplaceDependentDispatchSequence(
+util.func public @emplaceDependentDispatchSequence(
// CHECK-SAME: %[[INPUT:arg[0-9]+]]: !stream.resource<*>, %[[INPUT_SIZE:arg[0-9]+]]: index,
%input: !stream.resource<*>, %input_size: index,
// CHECK-SAME: %[[UPDATE_SIZE:arg[0-9]+]]: index, %[[TARGET_SIZE:arg[0-9]+]]: index
@@ -213,6 +213,6 @@
%target0 = stream.async.update %update0, %target[%c0 to %c49152] : !stream.resource<*>{%update_size} -> %target as !stream.resource<*>{%target_size}
// CHECK-NEXT: %[[TARGET1:.+]] = stream.async.update %[[UPDATE1]], %[[TARGET0]]
%target1 = stream.async.update %update1, %target0[%c49152 to %c98304] : !stream.resource<*>{%update_size} -> %target0 as !stream.resource<*>{%target_size}
- // CHECK-NEXT: return %[[TARGET1]]
- return %target1 : !stream.resource<*>
+ // CHECK-NEXT: util.return %[[TARGET1]]
+ util.return %target1 : !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
index a9e57f0..06c0700 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
@@ -4,7 +4,7 @@
stream.executable private @convert_load_i1 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:tensor<4xi8>>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4xi1>>
@@ -13,7 +13,7 @@
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi1>> -> tensor<?xi1>
// CHECK: util.optimization_barrier %[[TILE_I1]]
util.optimization_barrier %tile : tensor<?xi1>
- return
+ util.return
}
}
}
@@ -24,7 +24,7 @@
stream.executable private @convert_store_i1 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[TILE_I8:.+]] = arith.constant dense<[0, 0, 1, 1]> : tensor<4xi8>
// CHECK-DAG: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
@@ -32,7 +32,7 @@
%cst = arith.constant dense<[false, false, true, true]> : tensor<4xi1>
// CHECK-NEXT: flow.dispatch.tensor.store %[[TILE_I8]], %[[BINDING]], {{.+}} : tensor<4xi8> -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
flow.dispatch.tensor.store %cst, %binding, offsets = [0], sizes = [4], strides = [1] : tensor<4xi1> -> !flow.dispatch.tensor<writeonly:tensor<4xi1>>
- return
+ util.return
}
}
}
@@ -43,7 +43,7 @@
stream.executable private @convert_multi_i1 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding, %arg1: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding, %arg1: !stream.binding) {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
// CHECK-DAG: %[[BINDING0:.+]] = stream.binding.subspan %arg0{{.+}} -> !flow.dispatch.tensor<readonly:tensor<4xi8>>
@@ -61,7 +61,7 @@
// CHECK: %[[RESULT_I8:.+]] = arith.extui %[[RESULT_I1]] : tensor<?xi1> to tensor<?xi8>
// CHECK-NEXT: flow.dispatch.tensor.store %[[RESULT_I8]], %[[BINDING1]], {{.+}} : tensor<?xi8> -> !flow.dispatch.tensor<readwrite:tensor<4xi8>>
flow.dispatch.tensor.store %result, %binding1, offsets = [0], sizes = [%c4], strides = [1] : tensor<?xi1> -> !flow.dispatch.tensor<readwrite:tensor<4xi1>>
- return
+ util.return
}
}
}
@@ -72,7 +72,7 @@
stream.executable private @convert_load_i33 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:tensor<4xi64>>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4xi33>>
@@ -81,7 +81,7 @@
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi33>> -> tensor<?xi33>
// CHECK: util.optimization_barrier %[[TILE_I1]]
util.optimization_barrier %tile : tensor<?xi33>
- return
+ util.return
}
}
}
@@ -93,7 +93,7 @@
stream.executable private @convert_store_i33 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
// CHECK: %[[CST:.+]] = arith.constant dense<[0, 7, 2, 5]> : tensor<4xi64>
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<writeonly:tensor<4xi64>>
@@ -101,7 +101,7 @@
%cst = arith.constant dense<[0, 7, 2, 5]> : tensor<4xi33>
// CHECK: flow.dispatch.tensor.store %[[CST]], %[[BINDING]], {{.+}} : tensor<4xi64> -> !flow.dispatch.tensor<writeonly:tensor<4xi64>>
flow.dispatch.tensor.store %cst, %binding, offsets = [0], sizes = [4], strides = [1] : tensor<4xi33> -> !flow.dispatch.tensor<writeonly:tensor<4xi33>>
- return
+ util.return
}
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors_packing.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors_packing.mlir
index d5a2856..886abca 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors_packing.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors_packing.mlir
@@ -7,7 +7,7 @@
stream.executable private @subspanLoadI3 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:tensor<4xi8>>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4xi3>>
@@ -16,7 +16,7 @@
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi3>> -> tensor<?xi3>
// CHECK: util.optimization_barrier %[[TILE_I3]] : tensor<?xi3>
util.optimization_barrier %tile : tensor<?xi3>
- return
+ util.return
}
}
}
@@ -30,7 +30,7 @@
stream.executable private @subspanStoreI3 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
// CHECK: %[[CST:.+]] = arith.constant dense<[0, 7, 2, 5]> : tensor<4xi8>
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
@@ -38,7 +38,7 @@
%cst = arith.constant dense<[0, 7, 2, 5]> : tensor<4xi3>
// CHECK: flow.dispatch.tensor.store %[[CST]], %[[BINDING]], {{.+}} : tensor<4xi8> -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
flow.dispatch.tensor.store %cst, %binding, offsets = [0], sizes = [4], strides = [1] : tensor<4xi3> -> !flow.dispatch.tensor<writeonly:tensor<4xi3>>
- return
+ util.return
}
}
}
@@ -49,7 +49,7 @@
stream.executable private @subspanLoadI4 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:tensor<8xi4>>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<8xi4>>
@@ -57,7 +57,7 @@
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<8xi4>> -> tensor<?xi4>
// CHECK: util.optimization_barrier %[[TILE_I4]]
util.optimization_barrier %tile : tensor<?xi4>
- return
+ util.return
}
}
}
@@ -68,7 +68,7 @@
stream.executable private @subspanStoreI4 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[TILE_I4:.+]] = arith.constant dense<[5, -1, 0, 3, 1, 7, -8, 4]> : tensor<8xi4>
%cst = arith.constant dense<[5, 15, 0, 3, 1, 7, 8, 4]> : tensor<8xi4>
@@ -76,7 +76,7 @@
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<8xi4>>
// CHECK: flow.dispatch.tensor.store %[[TILE_I4]], %[[BINDING]], offsets = [0], sizes = [8], strides = [1] : tensor<8xi4> -> !flow.dispatch.tensor<writeonly:tensor<8xi4>>
flow.dispatch.tensor.store %cst, %binding, offsets = [0], sizes = [8], strides = [1] : tensor<8xi4> -> !flow.dispatch.tensor<writeonly:tensor<8xi4>>
- return
+ util.return
}
}
}
@@ -87,7 +87,7 @@
stream.executable private @subspanLoadI8 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<readonly:tensor<4xi8>>
%binding = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:tensor<4xi8>>
@@ -95,7 +95,7 @@
%tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi8>> -> tensor<?xi8>
// CHECK: util.optimization_barrier %[[TILE_I8]]
util.optimization_barrier %tile : tensor<?xi8>
- return
+ util.return
}
}
}
@@ -106,7 +106,7 @@
stream.executable private @subspanStoreI8 {
stream.executable.export public @dispatch
builtin.module {
- func.func @dispatch(%arg0: !stream.binding) {
+ util.func public @dispatch(%arg0: !stream.binding) {
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[TILE_I8:.+]] = arith.constant dense<[25, 8, 0, -1]> : tensor<4xi8>
// CHECK-DAG: %[[BINDING:.+]] = stream.binding.subspan {{.+}} -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
@@ -114,7 +114,7 @@
%cst = arith.constant dense<[25, 8, 0, 255]> : tensor<4xi8>
// CHECK-NEXT: flow.dispatch.tensor.store %[[TILE_I8]], %[[BINDING]], {{.+}} : tensor<4xi8> -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
flow.dispatch.tensor.store %cst, %binding, offsets = [0], sizes = [4], strides = [1] : tensor<4xi8> -> !flow.dispatch.tensor<writeonly:tensor<4xi8>>
- return
+ util.return
}
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
index b34db57..cb1aca7 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors.mlir
@@ -1,44 +1,44 @@
// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors %s | FileCheck %s
// CHECK-LABEL: @denseTensorSizeOf
-func.func @denseTensorSizeOf(%arg0: index) -> index {
+util.func public @denseTensorSizeOf(%arg0: index) -> index {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 20 : index
// CHECK: %[[DYNAMIC_SIZE:.+]] = arith.muli %arg0, %[[STATIC_SIZE]] : index
%0 = stream.tensor.sizeof tensor<?x5xf32>{%arg0} : index
- // CHECK: return %[[DYNAMIC_SIZE]]
- return %0 : index
+ // CHECK: util.return %[[DYNAMIC_SIZE]]
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @denseTensorSizeOfEmpty
-func.func @denseTensorSizeOfEmpty(%arg0: index) -> index {
+util.func public @denseTensorSizeOfEmpty(%arg0: index) -> index {
// CHECK: %[[ZERO:.+]] = arith.constant 0 : index
%0 = stream.tensor.sizeof tensor<?x0xf32>{%arg0} : index
- // CHECK: return %[[ZERO]]
- return %0 : index
+ // CHECK: util.return %[[ZERO]]
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @denseTensorEmpty
-func.func @denseTensorEmpty(%arg0: index, %arg1: index) -> !stream.resource<*> {
+util.func public @denseTensorEmpty(%arg0: index, %arg1: index) -> !stream.resource<*> {
// CHECK: %[[RET:.+]] = stream.async.alloca : !stream.resource<*>{%arg1}
%0 = stream.tensor.empty : tensor<?x1xf32>{%arg0} in !stream.resource<*>{%arg1}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorConstant
-func.func @denseTensorConstant(%arg0: index) -> !stream.resource<constant> {
+util.func public @denseTensorConstant(%arg0: index) -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 1280 : index
// CHECK: %[[DYNAMIC_SIZE:.+]] = arith.muli %arg0, %[[STATIC_SIZE]] : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[DYNAMIC_SIZE]]} = dense<0.000000e+00> : tensor<1x5x64xf32>
%0 = stream.tensor.constant : tensor<?x5x64xf32>{%arg0} in !stream.resource<constant> = dense<0.000000e+00> : tensor<1x5x64xf32>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
@@ -46,91 +46,91 @@
// Tests that sub-byte element width constants get extended to byte alignment.
// CHECK-LABEL: @denseTensorConstantI1
-func.func @denseTensorConstantI1() -> !stream.resource<constant> {
+util.func public @denseTensorConstantI1() -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 4 : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[STATIC_SIZE]]} = dense<[1, 1, 0, 1]> : tensor<4xi8>
%0 = stream.tensor.constant : tensor<4xi1> in !stream.resource<constant> = dense<[true, true, false, true]> : tensor<4xi1>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @denseTensorSplatI32
-func.func @denseTensorSplatI32(%arg0: i32, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatI32(%arg0: i32, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[RET:.+]] = stream.async.splat %arg0 : i32 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : i32 -> tensor<?x1x10xi32>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatI1
-func.func @denseTensorSplatI1(%arg0: i1, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatI1(%arg0: i1, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[PATTERN:.+]] = arith.extui %arg0 : i1 to i8
// CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i8 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : i1 -> tensor<?x1x10xi1>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatBF16
-func.func @denseTensorSplatBF16(%arg0: bf16, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatBF16(%arg0: bf16, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[PATTERN:.+]] = arith.bitcast %arg0 : bf16 to i16
// CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i16 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : bf16 -> tensor<?x1x10xbf16>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatF32
-func.func @denseTensorSplatF32(%arg0: f32, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatF32(%arg0: f32, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[PATTERN:.+]] = arith.bitcast %arg0 : f32 to i32
// CHECK: %[[RET:.+]] = stream.async.splat %[[PATTERN]] : i32 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : f32 -> tensor<?x1x10xf32>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatI64
-func.func @denseTensorSplatI64(%arg0: i64, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatI64(%arg0: i64, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[RET:.+]] = stream.async.splat %arg0 : i64 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : i64 -> tensor<?x1x10xi64>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatConstantComplexF32
-func.func @denseTensorSplatConstantComplexF32(%arg0: !stream.resource<*>) -> (!stream.resource<*>) {
+util.func public @denseTensorSplatConstantComplexF32(%arg0: !stream.resource<*>) -> (!stream.resource<*>) {
%cst = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex<f32>
%0 = stream.tensor.sizeof tensor<6xcomplex<f32>> : index
// CHECK: %[[I64NUMBER:.+]] = complex.constant [3.000000e+00 : f32, 1.000000e+01 : f32] : complex<f32>
// CHECK: %[[BITCAST:.+]] = complex.bitcast %[[I64NUMBER]] : complex<f32> to i64
// CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]]
%1 = stream.tensor.splat %cst : complex<f32> -> tensor<6xcomplex<f32>> in !stream.resource<*>{%0}
- // CHECK: return %[[SPLAT_RES]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[SPLAT_RES]]
+ util.return %1 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSplatDynamicComplexF32
-func.func @denseTensorSplatDynamicComplexF32(%arg0: !stream.resource<*>, %arg1: complex<f32>) -> (!stream.resource<*>) {
+util.func public @denseTensorSplatDynamicComplexF32(%arg0: !stream.resource<*>, %arg1: complex<f32>) -> (!stream.resource<*>) {
%0 = stream.tensor.sizeof tensor<6xcomplex<f32>> : index
// CHECK: %[[BITCAST:.+]] = complex.bitcast %arg1 : complex<f32> to i64
// CHECK: %[[SPLAT_RES:.+]] = stream.async.splat %[[BITCAST]]
%1 = stream.tensor.splat %arg1 : complex<f32> -> tensor<6xcomplex<f32>> in !stream.resource<*>{%0}
- // CHECK: return %[[SPLAT_RES]]
- return %1 : !stream.resource<*>
+ // CHECK: util.return %[[SPLAT_RES]]
+ util.return %1 : !stream.resource<*>
}
// -----
@@ -138,33 +138,33 @@
// NOTE: clone likes to fold; the fills ensure it doesn't.
// CHECK-LABEL: @denseTensorClone
-func.func @denseTensorClone(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: f32) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @denseTensorClone(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: f32) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: %[[RET:.+]] = stream.async.clone %arg0 : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg2}
%0 = stream.tensor.clone %arg0 : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2}
%1 = stream.tensor.fill %arg3, %0[%c0, %c0 for %c1, %c1] : f32 -> tensor<?x4xf32>{%arg1} in %0 as !stream.resource<*>{%arg2}
- return %0, %1 : !stream.resource<*>, !stream.resource<*>
+ util.return %0, %1 : !stream.resource<*>, !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSlice
-func.func @denseTensorSlice(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) -> !stream.resource<*> {
+util.func public @denseTensorSlice(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 4 : index
// CHECK: %[[END:.+]] = arith.addi %arg4, %[[OFFSET]] : index
// CHECK: %[[RET:.+]] = stream.async.slice %arg0[%[[OFFSET]] to %[[END]]] : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg4}
%0 = stream.tensor.slice %arg0[%c0, %c1 for %arg3, %c1] : tensor<?x4xf32>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x1xf32>{%arg3} in !stream.resource<*>{%arg4}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorFillF32
-func.func @denseTensorFillF32(%arg0: f32, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
+util.func public @denseTensorFillF32(%arg0: f32, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index
@@ -172,28 +172,28 @@
// CHECK-DAG: %[[PATTERN:.+]] = arith.bitcast %arg0 : f32 to i32
// CHECK: %[[RET:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i32 -> %arg1 as !stream.resource<*>{%arg3}
%0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f32 -> tensor<?x4xf32>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorFillI64
-func.func @denseTensorFillI64(%arg0: i64, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
+util.func public @denseTensorFillI64(%arg0: i64, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 40 : index
// CHECK: %[[RET:.+]] = stream.async.fill %arg0, %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3}
%0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : i64 -> tensor<?x4xi64>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorFillF64
-func.func @denseTensorFillF64(%arg0: f64, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
+util.func public @denseTensorFillF64(%arg0: f64, %arg1: !stream.resource<*>, %arg2: index, %arg3: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0 : index
@@ -201,67 +201,67 @@
// CHECK-DAG: %[[PATTERN:.+]] = arith.bitcast %arg0 : f64 to i64
// CHECK: %[[RET:.+]] = stream.async.fill %[[PATTERN]], %arg1[%[[OFFSET]] to %[[LENGTH]] for %[[LENGTH]]] : i64 -> %arg1 as !stream.resource<*>{%arg3}
%0 = stream.tensor.fill %arg0, %arg1[%c0, %c0 for %c1, %c1] : f64 -> tensor<?x4xi64>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorUpdate
-func.func @denseTensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) -> !stream.resource<*> {
+util.func public @denseTensorUpdate(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK: %[[RET:.+]] = stream.async.update %arg0, %arg2[%[[OFFSET]] to %arg1] : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg4}
%0 = stream.tensor.update %arg0, %arg2[%c0, %c0] : tensor<2x2xf32> in !stream.resource<*>{%arg1} -> tensor<?x4xf32>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorLoad
-func.func @denseTensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index) -> f32 {
+util.func public @denseTensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK: %[[RET:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource<staging>{%arg2} -> f32
%0 = stream.tensor.load %arg0[%c0] : tensor<?xf32>{%arg1} in !stream.resource<staging>{%arg2} -> f32
- // CHECK: return %[[RET]]
- return %0 : f32
+ // CHECK: util.return %[[RET]]
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @denseTensorLoadRank0
-func.func @denseTensorLoadRank0(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
+util.func public @denseTensorLoadRank0(%arg0: !stream.resource<staging>, %arg1: index) -> f32 {
%c0 = arith.constant 0 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK: %[[RET:.+]] = stream.async.load %arg0[%[[OFFSET]]] : !stream.resource<staging>{%arg1} -> f32
%0 = stream.tensor.load %arg0 : tensor<f32> in !stream.resource<staging>{%arg1} -> f32
- // CHECK: return %[[RET]]
- return %0 : f32
+ // CHECK: util.return %[[RET]]
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @denseTensorStore
-func.func @denseTensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: f32) -> !stream.resource<staging> {
+util.func public @denseTensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK: %[[RET:.+]] = stream.async.store %arg3, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource<staging>{%arg2}
%0 = stream.tensor.store %arg3, %arg0[%c0] : f32 -> tensor<?xf32>{%arg1} in %arg0 as !stream.resource<staging>{%arg2}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<staging>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @denseTensorStoreRank0
-func.func @denseTensorStoreRank0(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
+util.func public @denseTensorStoreRank0(%arg0: !stream.resource<staging>, %arg1: index, %arg2: f32) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: %[[OFFSET:.+]] = arith.constant 0 : index
// CHECK: %[[RET:.+]] = stream.async.store %arg2, %arg0[%[[OFFSET]]] : f32 -> %arg0 as !stream.resource<staging>{%arg1}
%0 = stream.tensor.store %arg2, %arg0 : f32 -> tensor<f32> in %arg0 as !stream.resource<staging>{%arg1}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<staging>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<staging>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing.mlir
index 824d0a7..bc58632 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_host_tensors_packing.mlir
@@ -1,15 +1,15 @@
// RUN: iree-opt --split-input-file --iree-stream-encode-host-tensors --verify-diagnostics %s | FileCheck %s
-// CHECK-LABEL: func.func @denseTensorConstantI2()
-func.func @denseTensorConstantI2() -> !stream.resource<constant> {
+// CHECK-LABEL: util.func public @denseTensorConstantI2()
+util.func public @denseTensorConstantI2() -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 4 : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[STATIC_SIZE]]} =
// CHECK-SAME: dense<[0, 1, -2, -1, 0, 1, -2, -1, 0, 1, -2, -1, 0, 1, -2, -1]> : tensor<16xi2>
%0 = stream.tensor.constant : tensor<16xi2> in !stream.resource<constant> = dense<[
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
]> : tensor<16xi2>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
@@ -17,78 +17,78 @@
// Ensures that a non-power-of-two type (i3) constant is expanded to a full byte
// because we don't currently do unaligned sub-byte packing.
-// CHECK: func.func @denseTensorConstantI3()
-func.func @denseTensorConstantI3() -> !stream.resource<constant> {
+// CHECK: util.func public @denseTensorConstantI3()
+util.func public @denseTensorConstantI3() -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 4 : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[STATIC_SIZE]]} = dense<[0, 7, 2, 5]> : tensor<4xi8>
%0 = stream.tensor.constant : tensor<4xi3> in !stream.resource<constant> = dense<[0, 7, 2, 5]> : tensor<4xi3>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @denseTensorConstantI4
-func.func @denseTensorConstantI4() -> !stream.resource<constant> {
+util.func public @denseTensorConstantI4() -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 4 : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[STATIC_SIZE]]} = dense<[5, -1, 0, 3, 1, 7, -8, 4]> : tensor<8xi4>
%0 = stream.tensor.constant : tensor<8xi4> in !stream.resource<constant> = dense<[5, 15, 0, 3, 1, 7, 8, 4]> : tensor<8xi4>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
// Checks that non-byte-aligned total size is not supported for constant.
-func.func @denseTensorConstantI4() -> !stream.resource<constant> {
+util.func public @denseTensorConstantI4() -> !stream.resource<constant> {
// expected-error @+1 {{failed to calculate total byte count: 'tensor<5xi4>' does not have integral number of total bytes}}
%0 = stream.tensor.constant : tensor<5xi4> in !stream.resource<constant> = dense<[5, 15, 0, 3, 1]> : tensor<5xi4>
- return %0 : !stream.resource<constant>
+ util.return %0 : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @denseTensorConstantI8
-func.func @denseTensorConstantI8() -> !stream.resource<constant> {
+util.func public @denseTensorConstantI8() -> !stream.resource<constant> {
// CHECK: %[[STATIC_SIZE:.+]] = arith.constant 8 : index
// CHECK: %[[RET:.+]] = stream.async.constant : !stream.resource<constant>{%[[STATIC_SIZE]]} = dense<[5, 15, 0, 3, 1, 7, 8, 4]> : tensor<8xi8>
%0 = stream.tensor.constant : tensor<8xi8> in !stream.resource<constant> = dense<[5, 15, 0, 3, 1, 7, 8, 4]> : tensor<8xi8>
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<constant>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<constant>
}
// -----
// CHECK-LABEL: @denseTensorSizeOfStatic
-func.func @denseTensorSizeOfStatic() -> index {
+util.func public @denseTensorSizeOfStatic() -> index {
// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
%0 = stream.tensor.sizeof tensor<12xi4> : index
- // CHECK: return %[[C6]]
- return %0 : index
+ // CHECK: util.return %[[C6]]
+ util.return %0 : index
}
// -----
// Checks that non-byte-aligned total size is not supported for sizeof.
-func.func @denseTensorSizeOfStatic() -> index {
+util.func public @denseTensorSizeOfStatic() -> index {
// expected-error @+1 {{failed to calculate total byte count: 'tensor<11xi4>' does not have integral number of total bytes}}
%0 = stream.tensor.sizeof tensor<11xi4> : index
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @denseTensorSizeOfDynamic
-func.func @denseTensorSizeOfDynamic(%arg0: index) -> index {
+util.func public @denseTensorSizeOfDynamic(%arg0: index) -> index {
// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK: %[[MUL:.+]] = arith.muli %arg0, %[[C5]] : index
// CHECK: %[[DIV:.+]] = arith.divui %[[MUL]], %[[C2]] : index
%0 = stream.tensor.sizeof tensor<?x5xi4>{%arg0} : index
- // CHECK: return %[[DIV]]
- return %0 : index
+ // CHECK: util.return %[[DIV]]
+ util.return %0 : index
}
// -----
@@ -96,11 +96,11 @@
// Checks that stream.tensor.load with sub-byte packing is not supported right now.
// CHECK-LABEL: @denseTensorLoad
-func.func @denseTensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: index) -> i4 {
+util.func public @denseTensorLoad(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: index) -> i4 {
%c0 = arith.constant 0 : index
// CHECK: stream.tensor.load
%0 = stream.tensor.load %arg0[%arg3] : tensor<?xi4>{%arg1} in !stream.resource<staging>{%arg2} -> i4
- return %0 : i4
+ util.return %0 : i4
}
// -----
@@ -108,17 +108,17 @@
// Checks that stream.tensor.store with sub-byte packing is not supported right now.
// CHECK-LABEL: @denseTensorStore
-func.func @denseTensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: i4) -> !stream.resource<staging> {
+util.func public @denseTensorStore(%arg0: !stream.resource<staging>, %arg1: index, %arg2: index, %arg3: i4) -> !stream.resource<staging> {
%c0 = arith.constant 0 : index
// CHECK: stream.tensor.store
%0 = stream.tensor.store %arg3, %arg0[%c0] : i4 -> tensor<?xi4>{%arg1} in %arg0 as !stream.resource<staging>{%arg2}
- return %0 : !stream.resource<staging>
+ util.return %0 : !stream.resource<staging>
}
// -----
// CHECK-LABEL: @denseTensorSplatI2
-func.func @denseTensorSplatI2(%arg0: i2, %arg1: index, %arg2: index) -> !stream.resource<*> {
+util.func public @denseTensorSplatI2(%arg0: i2, %arg1: index, %arg2: index) -> !stream.resource<*> {
// CHECK: %[[C2:.+]] = arith.constant 2 : i8
// CHECK: %[[PART:.+]] = arith.extui %arg0 : i2 to i8
// CHECK: %[[SHL0:.+]] = arith.shli %[[PART]], %[[C2]] : i8
@@ -129,14 +129,14 @@
// CHECK: %[[FULL:.+]] = arith.ori %[[SH2]], %[[PART]] : i8
// CHECK: %[[SPLAT:.+]] = stream.async.splat %[[FULL]] : i8 -> !stream.resource<*>{%arg2}
%0 = stream.tensor.splat %arg0 : i2 -> tensor<?x1x16xi2>{%arg1} in !stream.resource<*>{%arg2}
- // CHECK: return %[[SPLAT]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[SPLAT]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorFillI4
-func.func @denseTensorFillI4(%arg0: i4, %arg1: !stream.resource<*>, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) -> !stream.resource<*> {
+util.func public @denseTensorFillI4(%arg0: i4, %arg1: !stream.resource<*>, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) -> !stream.resource<*> {
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : i8
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
@@ -152,14 +152,14 @@
// CHECK: %[[END:.+]] = arith.addi %[[OFFSET]], %[[LEN]] : index
// CHECK: %[[FILL:.+]] = stream.async.fill %[[FULL]], %arg1[%[[OFFSET]] to %[[END]] for %[[LEN]]] : i8 -> %arg1 as !stream.resource<*>{%arg3}
%0 = stream.tensor.fill %arg0, %arg1[%arg4, %arg5 for %arg6, %arg7] : i4 -> tensor<?x16xi4>{%arg2} in %arg1 as !stream.resource<*>{%arg3}
- // CHECK: return %[[FILL]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[FILL]]
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorSliceI2
-func.func @denseTensorSliceI2(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
+util.func public @denseTensorSliceI2(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
%c2 = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index
@@ -169,8 +169,8 @@
// CHECK: %[[LEN:.+]] = arith.addi %[[OFFSET]], %arg4 : index
// CHECK: %[[SLICE:.+]] = stream.async.slice %arg0[%[[OFFSET]] to %[[LEN]]] : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg4}
%0 = stream.tensor.slice %arg0[%arg5, %arg6 for %arg3, %c2] : tensor<?x8xi2>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x2xi2>{%arg3} in !stream.resource<*>{%arg4}
- // CHECK: return %[[SLICE]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[SLICE]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -179,7 +179,7 @@
// because we don't currently do unaligned sub-byte packing.
// CHECK-LABEL: @denseTensorSliceI3
-func.func @denseTensorSliceI3(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
+util.func public @denseTensorSliceI3(%arg0: !stream.resource<*>, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
%c2 = arith.constant 2 : index
// CHECK: %[[C8:.+]] = arith.constant 8 : index
// CHECK: %[[MUL:.+]] = arith.muli %arg5, %[[C8]] : index
@@ -187,8 +187,8 @@
// CHECK: %[[LEN:.+]] = arith.addi %[[OFFSET]], %arg4 : index
// CHECK: %[[SLICE:.+]] = stream.async.slice %arg0[%[[OFFSET]] to %[[LEN]]] : !stream.resource<*>{%arg2} -> !stream.resource<*>{%arg4}
%0 = stream.tensor.slice %arg0[%arg5, %arg6 for %arg3, %c2] : tensor<?x8xi3>{%arg1} in !stream.resource<*>{%arg2} -> tensor<?x2xi3>{%arg3} in !stream.resource<*>{%arg4}
- // CHECK: return %[[SLICE]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[SLICE]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -197,7 +197,7 @@
// because we don't currently do unaligned sub-byte packing.
// CHECK-LABEL: @denseTensorUpdateI3
-func.func @denseTensorUpdateI3(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
+util.func public @denseTensorUpdateI3(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
// CHECK: %[[C4:.+]] = arith.constant 4 : index
// CHECK: %[[MUL:.+]] = arith.muli %arg5, %[[C4]] : index
// CHECK: %[[OFFSET:.+]] = arith.addi %[[MUL]], %arg6 : index
@@ -205,14 +205,14 @@
// CHECK: %[[UPDATE:.+]] = stream.async.update %arg0, %arg2[%[[OFFSET]] to %[[LEN]]] : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg4}
%0 = stream.tensor.update %arg0, %arg2[%arg5, %arg6] : tensor<8x4xi3> in !stream.resource<*>{%arg1} -> tensor<?x4xi3>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
- // CHECK: return %[[UPDATE]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[UPDATE]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// CHECK-LABEL: @denseTensorUpdateI4
-func.func @denseTensorUpdateI4(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
+util.func public @denseTensorUpdateI4(%arg0: !stream.resource<*>, %arg1: index, %arg2: !stream.resource<*>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) -> !stream.resource<*> {
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK: %[[MUL:.+]] = arith.muli %arg5, %[[C4]] : index
@@ -221,6 +221,6 @@
// CHECK: %[[LEN:.+]] = arith.addi %[[OFFSET]], %arg1 : index
// CHECK: %[[UPDATE:.+]] = stream.async.update %arg0, %arg2[%[[OFFSET]] to %[[LEN]]] : !stream.resource<*>{%arg1} -> %arg2 as !stream.resource<*>{%arg4}
%0 = stream.tensor.update %arg0, %arg2[%arg5, %arg6] : tensor<8x4xi4> in !stream.resource<*>{%arg1} -> tensor<?x4xi4>{%arg3} in %arg2 as !stream.resource<*>{%arg4}
- // CHECK: return %[[UPDATE]] : !stream.resource<*>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[UPDATE]] : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_globals.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_globals.mlir
index f817857..4c9ce60 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_globals.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_globals.mlir
@@ -6,27 +6,27 @@
// CHECK: util.global public mutable @uniformConstants = #stream.timepoint<immediate>
util.global public mutable @uniformConstants : !stream.timepoint
-func.func @foo() {
+util.func public @foo() {
%timepoint = stream.timepoint.immediate => !stream.timepoint
// CHECK-NOT: util.global.store
util.global.store %timepoint, @uniformConstants : !stream.timepoint
- return
+ util.return
}
-func.func @bar() {
+util.func public @bar() {
%timepoint = stream.timepoint.immediate => !stream.timepoint
// CHECK-NOT: util.global.store
util.global.store %timepoint, @uniformConstants : !stream.timepoint
- return
+ util.return
}
// -----
// CHECK-NOT: @immutable
util.global private @immutable = #stream.timepoint<immediate> : !stream.timepoint
-func.func @foo() -> !stream.timepoint {
+util.func public @foo() -> !stream.timepoint {
// CHECK-NOT: util.global.load @immutable
// CHECK: %[[IMMEDIATE:.+]] = stream.timepoint.immediate => !stream.timepoint
%0 = util.global.load @immutable : !stream.timepoint
- // CHECK: return %[[IMMEDIATE]]
- return %0 : !stream.timepoint
+ // CHECK: util.return %[[IMMEDIATE]]
+ util.return %0 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
index 985f28e..f37b7a9 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
@@ -11,8 +11,8 @@
stream.executable private @deduplicateOperandsEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A01:.+]]: i32, %[[B0:.+]]: index, %[[C:.+]]: i1, %[[B1:.+]]: index)
- func.func @dispatch(%binding: !stream.binding, %a0: i32, %b0: index, %c: i1, %a1: i32, %b1: index) {
+ // CHECK: util.func public @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A01:.+]]: i32, %[[B0:.+]]: index, %[[C:.+]]: i1, %[[B1:.+]]: index)
+ util.func public @dispatch(%binding: !stream.binding, %a0: i32, %b0: index, %c: i1, %a1: i32, %b1: index) {
// CHECK-NEXT: util.optimization_barrier %[[BINDING]] : !stream.binding
util.optimization_barrier %binding : !stream.binding
// CHECK-NEXT: util.optimization_barrier %[[A01]] : i32
@@ -25,12 +25,12 @@
util.optimization_barrier %b1 : index
// CHECK-NEXT: util.optimization_barrier %[[C]] : i1
util.optimization_barrier %c : i1
- return
+ util.return
}
}
}
-// CHECK: func.func @deduplicateOperands(%[[A:.+]]: i32, %[[B:.+]]: index, %[[C:.+]]: i1)
-func.func @deduplicateOperands(%a: i32, %b: index, %c: i1) {
+// CHECK: util.func public @deduplicateOperands(%[[A:.+]]: i32, %[[B:.+]]: index, %[[C:.+]]: i1)
+util.func public @deduplicateOperands(%a: i32, %b: index, %c: i1) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -45,7 +45,7 @@
rw %capture[%c0 for %c20] : !stream.resource<transient>{%c20}
}
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -60,8 +60,8 @@
stream.executable private @inlineConstantOperandsEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A:.+]]: i32, %[[C:.+]]: i1)
- func.func @dispatch(%binding: !stream.binding, %a: i32, %b: index, %c: i1) {
+ // CHECK: util.func public @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A:.+]]: i32, %[[C:.+]]: i1)
+ util.func public @dispatch(%binding: !stream.binding, %a: i32, %b: index, %c: i1) {
// CHECK: %[[B:.+]] = arith.constant 20 : index
// CHECK-NEXT: util.optimization_barrier %[[BINDING]] : !stream.binding
util.optimization_barrier %binding : !stream.binding
@@ -71,12 +71,12 @@
util.optimization_barrier %b : index
// CHECK-NEXT: util.optimization_barrier %[[C]] : i1
util.optimization_barrier %c : i1
- return
+ util.return
}
}
}
-// CHECK: func.func @inlineConstantOperands(%[[A:.+]]: i32)
-func.func @inlineConstantOperands(%a: i32) {
+// CHECK: util.func public @inlineConstantOperands(%[[A:.+]]: i32)
+util.func public @inlineConstantOperands(%a: i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -93,5 +93,5 @@
rw %capture[%c0 for %c20] : !stream.resource<transient>{%c20}
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
index 6f0f7a6..14e8fb2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
@@ -16,9 +16,9 @@
stream.executable private @rebaseBindingsEx {
stream.executable.export public @dispatch attributes {stream.resources = #aliasConfig}
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_B:.+]]: !stream.binding,
+ // CHECK: util.func public @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_B:.+]]: !stream.binding,
// CHECK-SAME: %[[OFFSET_A:.+]]: index, %[[OFFSET_B:.+]]: index, %[[OPERAND:.+]]: index)
- func.func @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %operand: index) {
+ util.func public @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %operand: index) {
%c0 = arith.constant 0 : index
%c20 = arith.constant 20 : index
@@ -35,12 +35,12 @@
// CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
util.optimization_barrier %operand : index
- return
+ util.return
}
}
}
-// CHECK: func.func @rebaseBindings(%[[OPERAND:.+]]: index)
-func.func @rebaseBindings(%operand: index) {
+// CHECK: util.func public @rebaseBindings(%[[OPERAND:.+]]: index)
+util.func public @rebaseBindings(%operand: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -74,7 +74,7 @@
ro %capture1[%c160 for %c20] : !stream.resource<transient>{%c200}
}
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -97,9 +97,9 @@
stream.executable private @deduplicateBindingsEx {
stream.executable.export public @dispatch attributes {stream.resources = #aliasConfig}
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_B:.+]]: !stream.binding,
+ // CHECK: util.func public @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_B:.+]]: !stream.binding,
// CHECK-SAME: %[[OFFSET_A:.+]]: index, %[[OFFSET_C:.+]]: index, %[[OFFSET_B:.+]]: index, %[[OPERAND:.+]]: index)
- func.func @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %binding_c: !stream.binding, %operand: index) {
+ util.func public @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %binding_c: !stream.binding, %operand: index) {
%c0 = arith.constant 0 : index
%c20 = arith.constant 20 : index
%c40 = arith.constant 40 : index
@@ -123,12 +123,12 @@
// CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
util.optimization_barrier %operand : index
- return
+ util.return
}
}
}
-// CHECK: func.func @deduplicateBindings(%[[OPERAND:.+]]: index)
-func.func @deduplicateBindings(%operand: index) {
+// CHECK: util.func public @deduplicateBindings(%[[OPERAND:.+]]: index)
+util.func public @deduplicateBindings(%operand: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -166,5 +166,5 @@
rw %capture0[%c20 for %c20] : !stream.resource<transient>{%c200}
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
index 1736232..6b9696b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
@@ -13,9 +13,9 @@
stream.executable private @deduplicateBindingsEx {
stream.executable.export public @dispatch attributes {stream.resources = #noaliasConfig}
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_C:.+]]: !stream.binding,
+ // CHECK: util.func public @dispatch(%[[BINDING_A:.+]]: !stream.binding, %[[BINDING_C:.+]]: !stream.binding,
// CHECK-SAME: %[[OFFSET_A:.+]]: index, %[[OFFSET_B:.+]]: index, %[[OFFSET_C:.+]]: index, %[[OPERAND:.+]]: index)
- func.func @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %binding_c: !stream.binding, %operand: index) {
+ util.func public @dispatch(%binding_a: !stream.binding, %binding_b: !stream.binding, %binding_c: !stream.binding, %operand: index) {
%c0 = arith.constant 0 : index
%c20 = arith.constant 20 : index
%c40 = arith.constant 40 : index
@@ -39,12 +39,12 @@
// CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
util.optimization_barrier %operand : index
- return
+ util.return
}
}
}
-// CHECK: func.func @deduplicateBindings(%[[OPERAND:.+]]: index)
-func.func @deduplicateBindings(%operand: index) {
+// CHECK: util.func public @deduplicateBindings(%[[OPERAND:.+]]: index)
+util.func public @deduplicateBindings(%operand: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -78,5 +78,5 @@
rw %capture0[%c20 for %c20] : !stream.resource<transient>{%c200}
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/layout_slices.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/layout_slices.mlir
index 30cf36d..1bc6fc2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/layout_slices.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/layout_slices.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-stream-layout-slices, cse))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module( util.func(iree-stream-layout-slices, cse))' %s | FileCheck %s
#layoutStaticConfig = #stream.resource_config<{
max_allocation_size = 1073741824,
@@ -9,7 +9,7 @@
}>
// CHECK-LABEL: @layoutStatic
-func.func @layoutStatic() -> (index, index, index, index, index, index, index)
+util.func public @layoutStatic() -> (index, index, index, index, index, index, index)
attributes {stream.resources = #layoutStaticConfig} {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -22,9 +22,9 @@
[5, 8] = %c100, // +208 (after 200 align 16)
}) : index
// 224 + 200 align 16 = 432 total bytes required
- // CHECK: return %c432
+ // CHECK: util.return %c432
// CHECK-SAME: %c0, %c112, %c0, %c224, %c0, %c208
- return %t#0, %t#1, %t#2, %t#3, %t#4, %t#5, %t#6 : index, index, index, index, index, index, index
+ util.return %t#0, %t#1, %t#2, %t#3, %t#4, %t#5, %t#6 : index, index, index, index, index, index, index
}
// -----
@@ -39,7 +39,7 @@
// CHECK-LABEL: @layoutDynamic
// CHECK-SAME: (%[[SIZE_A:.+]]: index, %[[SIZE_B:.+]]: index)
-func.func @layoutDynamic(%size_a: index, %size_b: index) -> (index, index, index, index)
+util.func public @layoutDynamic(%size_a: index, %size_b: index) -> (index, index, index, index)
attributes {stream.resources = #layoutDynamicConfig} {
%t:4 = stream.resource.pack slices({
[0, 1] = %size_a,
@@ -54,8 +54,8 @@
// CHECK-DAG: %2 = util.align %[[SIZE_B]], %c16 : index
// CHECK-DAG: %3 = arith.addi %1, %2 : index
- // CHECK: return %3, %c0, %1, %c0
- return %t#0, %t#1, %t#2, %t#3 : index, index, index, index
+ // CHECK: util.return %3, %c0, %1, %c0
+ util.return %t#0, %t#1, %t#2, %t#3 : index, index, index, index
}
// -----
@@ -70,7 +70,7 @@
// CHECK-LABEL: @layoutMixedStaticDynamic
// CHECK-SAME: (%[[SIZE_A:.+]]: index, %[[SIZE_B:.+]]: index)
-func.func @layoutMixedStaticDynamic(%size_a: index, %size_b: index) -> (index, index, index, index, index)
+util.func public @layoutMixedStaticDynamic(%size_a: index, %size_b: index) -> (index, index, index, index, index)
attributes {stream.resources = #layoutMixedStaticDynamicConfig} {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -89,6 +89,6 @@
// CHECK-DAG: %2 = util.align %[[SIZE_B]], %c16 : index
// CHECK-DAG: %3 = arith.addi %1, %2 : index
- // CHECK: return %3, %c0, %c208, %1, %c0
- return %t#0, %t#1, %t#2, %t#3, %t#4 : index, index, index, index, index
+ // CHECK: util.return %3, %c0, %c208, %1, %c0
+ util.return %t#0, %t#1, %t#2, %t#3, %t#4 : index, index, index, index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir
index 92a0627..bc50f4e 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_builtins.mlir
@@ -5,13 +5,13 @@
// is set.
// CHECK-LABEL: @splatI32
-func.func @splatI32(%arg0: index, %arg1: i32) -> !stream.resource<*> {
+util.func public @splatI32(%arg0: index, %arg1: i32) -> !stream.resource<*> {
// NATIVE: %[[RET:.+]] = stream.async.splat %arg1
// EMULATED: %[[COUNT:.+]] = arith.divui %arg0, %c4
// EMULATED: %[[RET:.+]] = stream.async.dispatch @__builtin_splat_i32::@__builtin_splat_i32[%[[COUNT]]](%arg1, %[[COUNT]]) : (i32, index) -> !stream.resource<*>{%arg0}
%0 = stream.async.splat %arg1 : i32 -> !stream.resource<*>{%arg0}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// NATIVE-NOT: stream.executable private @__builtin_splat_i32
@@ -22,12 +22,12 @@
// Tests expansion of the stream.async.splat op for i64 types.
// CHECK-LABEL: @builtinSplatI64
-func.func @builtinSplatI64(%arg0: index, %arg1: i64) -> !stream.resource<*> {
+util.func public @builtinSplatI64(%arg0: index, %arg1: i64) -> !stream.resource<*> {
// CHECK: %[[COUNT:.+]] = arith.divui %arg0, %c8
// CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_splat_i64::@__builtin_splat_i64[%[[COUNT]]](%arg1, %[[COUNT]]) : (i64, index) -> !stream.resource<*>{%arg0}
%0 = stream.async.splat %arg1 : i64 -> !stream.resource<*>{%arg0}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// CHECK: stream.executable private @__builtin_splat_i64
@@ -38,12 +38,12 @@
// CHECK-LABEL: @builtinFillI64
// CHECK-SAME: (%[[RES:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[VALUE:.+]]: i64, %[[BYTE_OFFSET:.+]]: index, %[[BYTE_END:.+]]: index, %[[BYTE_LENGTH:.+]]: index)
-func.func @builtinFillI64(%res: !stream.resource<*>, %size: index, %value: i64, %byte_offset: index, %byte_end: index, %byte_length: index) -> !stream.resource<*> {
+util.func public @builtinFillI64(%res: !stream.resource<*>, %size: index, %value: i64, %byte_offset: index, %byte_end: index, %byte_length: index) -> !stream.resource<*> {
// CHECK: %[[COUNT:.+]] = arith.divui %[[BYTE_LENGTH]], %c8
// CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_fill_i64::@__builtin_fill_i64[%[[COUNT]]](%[[RES]][%[[BYTE_OFFSET]] to %[[BYTE_END]] for %[[BYTE_LENGTH]]], %[[VALUE]], %[[BYTE_OFFSET]], %[[COUNT]]) : (!stream.resource<*>{%[[SIZE]]}, i64, index, index) -> %[[RES]]{%[[SIZE]]}
%0 = stream.async.fill %value, %res[%byte_offset to %byte_end for %byte_length] : i64 -> %arg0 as !stream.resource<*>{%size}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// CHECK: stream.executable private @__builtin_fill_i64
@@ -54,7 +54,7 @@
// correct places.
// CHECK-LABEL: @builtinSplatI64
-func.func @builtinSplatI64(%arg0: index, %arg1: i64) -> (!stream.resource<*>, !stream.timepoint) {
+util.func public @builtinSplatI64(%arg0: index, %arg1: i64) -> (!stream.resource<*>, !stream.timepoint) {
// CHECK: %[[COUNT:.+]] = arith.divui %arg0, %c8
// CHECK: = stream.async.execute
%0:2 = stream.async.execute with() -> !stream.resource<*>{%arg0} {
@@ -67,7 +67,7 @@
}
stream.yield %1 : !stream.resource<*>{%arg0}
} => !stream.timepoint
- return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<*>, !stream.timepoint
}
// CHECK: stream.executable private @__builtin_splat_i64
@@ -87,14 +87,14 @@
// CHECK: stream.executable private @__builtin_splat_i64
-// CHECK: func.func @otherUser
-func.func @otherUser() -> !stream.resource<*> {
+// CHECK: util.func public @otherUser
+util.func public @otherUser() -> !stream.resource<*> {
%c128 = arith.constant 128 : index
%c1_i64 = arith.constant 1 : i64
// CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_splat_i64::@__builtin_splat_i64
%0 = stream.async.splat %c1_i64 : i64 -> !stream.resource<*>{%c128}
- // CHECK: return %[[RET]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]]
+ util.return %0 : !stream.resource<*>
}
// CHECK-NOT: stream.executable private @__builtin_splat_i64
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_copy_on_write.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_copy_on_write.mlir
index 133eda0..9ca52ce 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_copy_on_write.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/materialize_copy_on_write.mlir
@@ -1,19 +1,19 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-stream-materialize-copy-on-write))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module( util.func(iree-stream-materialize-copy-on-write))' %s | FileCheck %s
// Tests that block arguments (including function arguments) are always cloned.
// Until a whole-program analysis runs we don't know their semantics.
// CHECK-LABEL: @blockArgsNeedCopies
// CHECK-SAME: (%[[SRC:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index)
-func.func @blockArgsNeedCopies(%src: !stream.resource<*>, %size: index) -> !stream.resource<*> {
+util.func public @blockArgsNeedCopies(%src: !stream.resource<*>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
// CHECK: %[[CLONE:.+]] = stream.async.clone %[[SRC]] : !stream.resource<*>{%[[SIZE]]} -> !stream.resource<*>{%[[SIZE]]}
// CHECK: %[[FILL:.+]] = stream.async.fill %c123_i32, %[[CLONE]]{{.+}} -> %[[CLONE]]
%0 = stream.async.fill %c123_i32, %src[%c0 to %c128 for %c128] : i32 -> %src as !stream.resource<*>{%size}
- // CHECK: return %[[FILL]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[FILL]]
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -22,7 +22,7 @@
// CHECK-LABEL: @singleUseTiedOperand
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @singleUseTiedOperand(%size: index) -> !stream.resource<*> {
+util.func public @singleUseTiedOperand(%size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -37,7 +37,7 @@
// CHECK-NOT: stream.async.clone
// CHECK: stream.async.fill
%2 = stream.async.fill %c789_i32, %1[%c128 to %c256 for %c128] : i32 -> %0 as !stream.resource<*>{%size}
- return %2 : !stream.resource<*>
+ util.return %2 : !stream.resource<*>
}
// -----
@@ -46,7 +46,7 @@
// user.
// CHECK-LABEL: @multipleUsesOneUser
-func.func private @multipleUsesOneUser(%size: index) -> !stream.resource<*> {
+util.func private @multipleUsesOneUser(%size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -56,7 +56,7 @@
// CHECK-NOT: stream.async.clone
// CHECK: stream.async.dispatch
%1 = stream.async.dispatch @ex::@dispatch(%0[%c0 to %c128 for %c128], %0[%c128 to %c256 for %c128]) : (!stream.resource<*>{%size}, !stream.resource<*>{%size}) -> %0{%size}
- return %1 : !stream.resource<*>
+ util.return %1 : !stream.resource<*>
}
// -----
@@ -66,7 +66,7 @@
// CHECK-LABEL: @oneCopyPerOperation
// CHECK-SAME: (%[[SRC:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index)
-func.func @oneCopyPerOperation(%src: !stream.resource<*>, %size: index) -> !stream.resource<*> {
+util.func public @oneCopyPerOperation(%src: !stream.resource<*>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 128 : index
@@ -74,8 +74,8 @@
// CHECK-NOT: stream.async.clone
// CHECK: %[[RESULT:.+]] = stream.async.dispatch @ex::@dispatch(%[[CLONE]]{{.*}}, %[[CLONE]]{{.*}}) {{.*}} -> %[[CLONE]]{%[[SIZE]]}
%0 = stream.async.dispatch @ex::@dispatch(%src[%c0 to %c128 for %c128], %src[%c128 to %c256 for %c128]) : (!stream.resource<*>{%size}, !stream.resource<*>{%size}) -> %src{%size}
- // CHECK: return %[[RESULT]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RESULT]]
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -86,7 +86,7 @@
// CHECK-LABEL: @multiUseTiedOperand
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @multiUseTiedOperand(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @multiUseTiedOperand(%size: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -101,7 +101,7 @@
// CHECK: %[[CLONE1:.+]] = stream.async.clone %[[SPLAT]]
// CHECK: %[[FILL1:.+]] = stream.async.fill %c789_i32, %[[CLONE1]]
%2 = stream.async.fill %c789_i32, %0[%c128 to %c256 for %c128] : i32 -> %0 as !stream.resource<*>{%size}
- return %1, %2 : !stream.resource<*>, !stream.resource<*>
+ util.return %1, %2 : !stream.resource<*>, !stream.resource<*>
}
// -----
@@ -112,7 +112,7 @@
// CHECK-LABEL: @tiedCollectivesTODO
// CHECK-SAME: (%[[CHANNEL:.+]]: !stream.channel, %[[SEND_RECV:.+]]: !stream.resource<*>, %[[SEND_SIZE:.+]]: index, %[[RECV_SIZE:.+]]: index, %[[COUNT:.+]]: index)
-func.func private @tiedCollectivesTODO(%channel: !stream.channel, %send_recv: !stream.resource<*>, %send_size: index, %recv_size: index, %count: index) -> !stream.resource<*> {
+util.func private @tiedCollectivesTODO(%channel: !stream.channel, %send_recv: !stream.resource<*>, %send_size: index, %recv_size: index, %count: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
// CHECK: %[[RECV_CLONE:.+]] = stream.async.clone on(#hal.affinity.queue<[0]>) %[[SEND_RECV]]
// CHECK: %[[ALL_GATHER:.+]] = stream.async.collective<all_gather : f32>[%[[COUNT]]]
@@ -123,8 +123,8 @@
%send_recv[%c0 to %recv_size for %recv_size] :
// CHECK-SAME: !stream.resource<*>{%[[SEND_SIZE]]} -> %[[RECV_CLONE]] as !stream.resource<*>{%[[RECV_SIZE]]}
!stream.resource<*>{%send_size} -> %recv as !stream.resource<*>{%recv_size}
- // CHECK: return %[[ALL_GATHER]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[ALL_GATHER]]
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -134,7 +134,7 @@
// original contents for use by @dispatch1.
// CHECK-LABEL: @tiedDispatches
-func.func private @tiedDispatches() {
+util.func private @tiedDispatches() {
%c0_i32 = arith.constant 0 : i32
%c1_i32 = arith.constant 1 : i32
%c0 = arith.constant 0 : index
@@ -156,7 +156,7 @@
// CHECK-SAME: (!stream.resource<*>{%c40}, !stream.resource<*>{%c40}) -> %[[DISPATCH0]]{%c40}
%dispatch1 = stream.async.dispatch @ex::@dispatch1[%c1, %c1, %c1](%dispatch0[%c0 to %c40 for %c40], %splat0[%c0 to %c40 for %c40]) : (!stream.resource<*>{%c40}, !stream.resource<*>{%c40}) -> %dispatch0{%c40}
- return
+ util.return
}
// -----
@@ -165,7 +165,7 @@
// take care of them later.
// CHECK-LABEL: @blockArgMove
-func.func @blockArgMove(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<*>) {
+util.func public @blockArgMove(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -185,5 +185,5 @@
cf.cond_br %cond, ^bb1(%fill0, %bb1_1_new : !stream.resource<*>, !stream.resource<*>),
^bb2(%fill0, %bb1_1_new : !stream.resource<*>, !stream.resource<*>)
^bb2(%bb2_0: !stream.resource<*>, %bb2_1: !stream.resource<*>):
- return %bb2_0, %bb2_1 : !stream.resource<*>, !stream.resource<*>
+ util.return %bb2_0, %bb2_1 : !stream.resource<*>, !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
index 2b5adc3..c472e82 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-stream-pack-constants))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module( util.func(iree-stream-pack-constants))' %s | FileCheck %s
// This is a high level test of the structure emitted by the pass.
// Subsequent tests focus on individual components.
@@ -14,7 +14,7 @@
// CHECK-NEXT: ]>
// CHECK-LABEL: @resourceConstants
-func.func @resourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
+util.func public @resourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
%c48 = arith.constant 48 : index
@@ -48,8 +48,8 @@
// CHECK: %[[RES1:.+]] = stream.resource.subview %[[IF]]#1[%c64] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c8}
// CHECK: %[[RES2:.+]] = stream.resource.subview %[[IF]]#1[%c128] : !stream.resource<constant>{%c192} -> !stream.resource<constant>{%c48}
- // CHECK: return %[[RES0]], %[[RES1]], %[[RES2]], %[[IF]]#0
- return %0#0, %0#1, %0#2, %0#3 : !stream.resource<constant>, !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ // CHECK: util.return %[[RES0]], %[[RES1]], %[[RES2]], %[[IF]]#0
+ util.return %0#0, %0#1, %0#2, %0#3 : !stream.resource<constant>, !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
// -----
@@ -63,7 +63,7 @@
// CHECK: ]>
// CHECK-LABEL: @resourceVariables
-func.func @resourceVariables() -> (!stream.resource<variable>, !stream.resource<variable>, !stream.timepoint) {
+util.func public @resourceVariables() -> (!stream.resource<variable>, !stream.resource<variable>, !stream.timepoint) {
%c8 = arith.constant 8 : index
%c1024 = arith.constant 1024 : index
@@ -80,8 +80,8 @@
!stream.resource<variable>{%c8} = dense<[101, 102]> : tensor<2xi32>
=> !stream.timepoint
- // CHECK: return %[[RES0]], %[[RES1]], %[[READ_TIMEPOINT]]
- return %0#0, %0#1, %0#2 : !stream.resource<variable>, !stream.resource<variable>, !stream.timepoint
+ // CHECK: util.return %[[RES0]], %[[RES1]], %[[READ_TIMEPOINT]]
+ util.return %0#0, %0#1, %0#2 : !stream.resource<variable>, !stream.resource<variable>, !stream.timepoint
}
// -----
@@ -108,7 +108,7 @@
// CHECK: ]>
// CHECK-LABEL: @splitResourceConstants
-func.func @splitResourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint)
+util.func public @splitResourceConstants() -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint)
attributes {stream.resources = #splitResourceConstantsConfig} {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
@@ -134,6 +134,6 @@
!stream.resource<constant>{%c8} = dense<[101, 102]> : tensor<2xi32>
=> !stream.timepoint
- // CHECK: return %[[RES0]], %[[RES1]], %[[IF1]]#0
- return %0#0, %0#1, %0#2 : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ // CHECK: util.return %[[RES0]], %[[RES1]], %[[IF1]]#0
+ util.return %0#0, %0#1, %0#2 : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
index 4f053e9..7977ed7 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
@@ -3,17 +3,17 @@
stream.executable private @ex0 {
stream.executable.export public @device_i1
builtin.module {
- // CHECK-LABEL: func.func @device_i1
+ // CHECK-LABEL: util.func public @device_i1
// CHECK-SAME: (%arg0: i32, %arg1: !stream.binding)
- func.func @device_i1(%arg0: i1 {stream.values = [true, false]}, %arg1: !stream.binding) {
+ util.func public @device_i1(%arg0: i1 {stream.values = [true, false]}, %arg1: !stream.binding) {
// CHECK-NEXT: %[[DEV_I1:.+]] = arith.trunci %arg0 {stream.values = [true, false]} : i32 to i1
// CHECK-NEXT: util.optimization_barrier %[[DEV_I1]]
util.optimization_barrier %arg0 : i1
- return
+ util.return
}
}
}
-func.func @host_i1(%arg0: i1) -> !stream.timepoint {
+util.func public @host_i1(%arg0: i1) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -25,7 +25,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -33,18 +33,18 @@
stream.executable private @ex1 {
stream.executable.export public @device_bf16
builtin.module {
- // CHECK-LABEL: func.func @device_bf16
+ // CHECK-LABEL: util.func public @device_bf16
// CHECK-SAME: (%arg0: i32, %arg1: !stream.binding)
- func.func @device_bf16(%arg0: bf16, %arg1: !stream.binding) {
+ util.func public @device_bf16(%arg0: bf16, %arg1: !stream.binding) {
// CHECK-NEXT: %[[DEV_I16:.+]] = arith.trunci %arg0 : i32 to i16
// CHECK-NEXT: %[[DEV_BF16:.+]] = arith.bitcast %[[DEV_I16]] : i16 to bf16
// CHECK-NEXT: util.optimization_barrier %[[DEV_BF16]]
util.optimization_barrier %arg0 : bf16
- return
+ util.return
}
}
}
-func.func @host_bf16(%arg0: bf16) -> !stream.timepoint {
+util.func public @host_bf16(%arg0: bf16) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -57,7 +57,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -66,20 +66,20 @@
// CHECK-LABEL: @device_i64
stream.executable.export public @device_i64
builtin.module {
- // CHECK-LABEL: func.func @device_i64
+ // CHECK-LABEL: util.func public @device_i64
// CHECK-SAME: (%[[DEV_LO32:.+]]: i32, %[[DEV_HI32:.+]]: i32, %arg2: !stream.binding)
- func.func @device_i64(%arg0: i64 {stream.values = [-1 : i64, 0x0000000200000003 : i64]}, %arg1: !stream.binding) {
+ util.func public @device_i64(%arg0: i64 {stream.values = [-1 : i64, 0x0000000200000003 : i64]}, %arg1: !stream.binding) {
// CHECK-DAG: %[[DEV_LO64:.+]] = arith.extui %[[DEV_LO32]] : i32 to i64
// CHECK-DAG: %[[DEV_HI64:.+]] = arith.extui %[[DEV_HI32]] : i32 to i64
// CHECK-DAG: %[[DEV_HISHL:.+]] = arith.shli %[[DEV_HI64]], %c32
// CHECK-DAG: %[[DEV_I64:.+]] = arith.ori %[[DEV_LO64]], %[[DEV_HISHL]] {stream.values = [-1, 8589934595]}
// CHECK-NEXT: util.optimization_barrier %[[DEV_I64]]
util.optimization_barrier %arg0 : i64
- return
+ util.return
}
}
}
-func.func @host_i64(%arg0: i64) -> !stream.timepoint {
+util.func public @host_i64(%arg0: i64) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -93,7 +93,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -109,9 +109,9 @@
stream.executable private @ex3 attributes {stream.resources = #resourceIndex32} {
stream.executable.export public @device_index_32
builtin.module {
- // CHECK-LABEL: func.func @device_index_32
+ // CHECK-LABEL: util.func public @device_index_32
// CHECK-SAME: (%[[DEV_I32:.+]]: i32, %{{.+}}: !stream.binding)
- func.func @device_index_32(%arg0: index {stream.alignment = 16 : index, stream.values = [0 : index, 1234 : index]}, %arg1: !stream.binding) {
+ util.func public @device_index_32(%arg0: index {stream.alignment = 16 : index, stream.values = [0 : index, 1234 : index]}, %arg1: !stream.binding) {
// 32-bit device size fits in a push constant:
// CHECK: %[[DEV_INDEX:.+]] = arith.index_castui %[[DEV_I32]] {
// CHECK-SAME: stream.alignment = 16 : index
@@ -119,11 +119,11 @@
// CHECK-SAME: } : i32 to index
// CHECK: util.optimization_barrier %[[DEV_INDEX]]
util.optimization_barrier %arg0 : index
- return
+ util.return
}
}
}
-func.func @host_index_32(%arg0: index) -> !stream.timepoint {
+util.func public @host_index_32(%arg0: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -138,7 +138,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -154,9 +154,9 @@
stream.executable private @ex4 attributes {stream.resources = #resourceIndex64} {
stream.executable.export public @device_index_64
builtin.module {
- // CHECK-LABEL: func.func @device_index_64
+ // CHECK-LABEL: util.func public @device_index_64
// CHECK-SAME: (%[[DEV_LO32:.+]]: i32, %[[DEV_HI32:.+]]: i32, %{{.+}}: !stream.binding)
- func.func @device_index_64(%arg0: index {stream.alignment = 16 : index, stream.values = [0 : index, 1234 : index]}, %arg1: !stream.binding) {
+ util.func public @device_index_64(%arg0: index {stream.alignment = 16 : index, stream.values = [0 : index, 1234 : index]}, %arg1: !stream.binding) {
// 64-bit device size requires joining after it was split into lo/hi:
// CHECK-DAG: %[[DEV_LO64:.+]] = arith.extui %[[DEV_LO32]] : i32 to i64
// CHECK-DAG: %[[DEV_HI64:.+]] = arith.extui %[[DEV_HI32]] : i32 to i64
@@ -168,11 +168,11 @@
// CHECK-SAME: } : i64 to index
// CHECK: util.optimization_barrier %[[DEV_INDEX]]
util.optimization_barrier %arg0 : index
- return
+ util.return
}
}
}
-func.func @host_index_64(%arg0: index) -> !stream.timepoint {
+util.func public @host_index_64(%arg0: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -190,7 +190,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -199,19 +199,19 @@
// CHECK-LABEL: @device_complex_f32
stream.executable.export public @device_complex_f32
builtin.module {
- // CHECK-LABEL: func.func @device_complex_f32
+ // CHECK-LABEL: util.func public @device_complex_f32
// CHECK-SAME: (%[[DEV_REAL_I32:.+]]: i32, %[[DEV_IMAG_I32:.+]]: i32, %arg2: !stream.binding)
- func.func @device_complex_f32(%arg0: complex<f32>, %arg1: !stream.binding) {
+ util.func public @device_complex_f32(%arg0: complex<f32>, %arg1: !stream.binding) {
// CHECK-DAG: %[[DEV_REAL_F32:.+]] = arith.bitcast %[[DEV_REAL_I32]] : i32 to f32
// CHECK-DAG: %[[DEV_IMAG_F32:.+]] = arith.bitcast %[[DEV_IMAG_I32]] : i32 to f32
// CHECK-DAG: %[[DEV_COMPLEX:.+]] = complex.create %[[DEV_REAL_F32]], %[[DEV_IMAG_F32]]
// CHECK-NEXT: util.optimization_barrier %[[DEV_COMPLEX]]
util.optimization_barrier %arg0 : complex<f32>
- return
+ util.return
}
}
}
-func.func @host_complex_f32(%arg0: complex<f32>) -> !stream.timepoint {
+util.func public @host_complex_f32(%arg0: complex<f32>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -226,7 +226,7 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %1 : !stream.timepoint
+ util.return %1 : !stream.timepoint
}
// -----
@@ -235,19 +235,19 @@
// CHECK-LABEL: @device_complex_f64_bitcast
stream.executable.export public @device_complex_f64_bitcast
builtin.module {
- // CHECK-LABEL: func.func @device_complex_f64
+ // CHECK-LABEL: util.func public @device_complex_f64
// CHECK-SAME: (%{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %arg4: !stream.binding)
- func.func @device_complex_f64_bitcast(%arg0: complex<f64>, %arg1: !stream.binding) {
+ util.func public @device_complex_f64_bitcast(%arg0: complex<f64>, %arg1: !stream.binding) {
// CHECK-COUNT-2: arith.bitcast {{.*}} : i64 to f64
// CHECK: %[[DEV_COMPLEX:.+]] = complex.create
// CHECK-NEXT: util.optimization_barrier %[[DEV_COMPLEX]]
util.optimization_barrier %arg0 : complex<f64>
- return
+ util.return
}
}
}
-// CHECK-LABEL: func.func @host_complex_bitcast
-func.func @host_complex_bitcast(%arg0: complex<f64>) -> !stream.timepoint {
+// CHECK-LABEL: util.func public @host_complex_bitcast
+util.func public @host_complex_bitcast(%arg0: complex<f64>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -269,5 +269,5 @@
wo %arg1[%c0 for %c128] : !stream.resource<external>{%c128}
}
} => !stream.timepoint
- return %2 : !stream.timepoint
+ util.return %2 : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
index 23934c4..3e7e384 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
@@ -13,7 +13,7 @@
util.global private mutable @constantGlobal : !stream.resource<constant>
// CHECK-LABEL: @globalLoad
-func.func private @globalLoad() {
+util.func private @globalLoad() {
// CHECK-NEXT: %[[RESOURCE:.+]] = util.global.load @constantGlobal : !stream.resource<constant>
// CHECK-NEXT: %[[STORAGE_SIZE:.+]] = util.global.load @constantGlobal__storage_size : index
// CHECK-NEXT: %[[OFFSET:.+]] = util.global.load @constantGlobal__offset : index
@@ -22,7 +22,7 @@
%0 = util.global.load @constantGlobal : !stream.resource<constant>
// CHECK-NEXT: util.optimization_barrier %[[SUBVIEW]]
util.optimization_barrier %0 : !stream.resource<constant>
- return
+ util.return
}
// -----
@@ -39,13 +39,13 @@
// CHECK-LABEL: @globalStore
// CHECK-SAME: (%[[RESOURCE:.+]]: !stream.resource<variable>, %[[STORAGE_SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
-func.func private @globalStore(%resource: !stream.resource<variable>) {
+util.func private @globalStore(%resource: !stream.resource<variable>) {
// CHECK: util.global.store %[[RESOURCE]], @mutableGlobal : !stream.resource<variable>
// CHECK: util.global.store %[[STORAGE_SIZE]], @mutableGlobal__storage_size : index
// CHECK: util.global.store %[[OFFSET]], @mutableGlobal__offset : index
// CHECK: util.global.store %[[LENGTH]], @mutableGlobal__length : index
util.global.store %resource, @mutableGlobal : !stream.resource<variable>
- return
+ util.return
}
// -----
@@ -57,7 +57,7 @@
// CHECK-LABEL: @funcArgs
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<external>, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !stream.resource<transient>, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @funcArgs(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
+util.func private @funcArgs(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
// CHECK-NEXT: %[[SUBVIEW0:.+]] = stream.resource.subview %[[RESOURCE0]][%[[OFFSET0]]] : !stream.resource<external>{%[[STORAGE_SIZE0]]} -> !stream.resource<external>{%[[LENGTH0]]}
// CHECK-NEXT: %[[SUBVIEW1:.+]] = stream.resource.subview %[[RESOURCE1]][%[[OFFSET1]]] : !stream.resource<transient>{%[[STORAGE_SIZE1]]} -> !stream.resource<transient>{%[[LENGTH1]]}
@@ -65,7 +65,7 @@
util.optimization_barrier %resource0 : !stream.resource<external>
// CHECK-NEXT: util.optimization_barrier %[[SUBVIEW1]]
util.optimization_barrier %resource1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -78,13 +78,13 @@
// CHECK-LABEL: @funcResults
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<external>, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !stream.resource<transient>, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
// CHECK-SAME: -> (!stream.resource<external>, index, index, index, !stream.resource<transient>, index, index, index)
-func.func private @funcResults(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
+util.func private @funcResults(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. Since the
// return should consume the subview that was inserted we expect to directly
// use the function arguments.
- // CHECK: return %[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]], %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]]
- return %resource0, %resource1 : !stream.resource<external>, !stream.resource<transient>
+ // CHECK: util.return %[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]], %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]]
+ util.return %resource0, %resource1 : !stream.resource<external>, !stream.resource<transient>
}
// -----
@@ -97,15 +97,15 @@
// CHECK-LABEL: @caller
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<external>, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !stream.resource<transient>, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @caller(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
+util.func private @caller(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. The call
// consumes the subviews and we expect the args to be passed directly.
- // CHECK: %[[RET:.+]]:8 = call @callee(%[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]],
- // CHECK-SAME: %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]])
+ // CHECK: %[[RET:.+]]:8 = util.call @callee(%[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]],
+ // CHECK-SAME: %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]])
// CHECK-SAME: : (!stream.resource<external>, index, index, index, !stream.resource<transient>, index, index, index)
// CHECK-SAME: -> (!stream.resource<external>, index, index, index, !stream.resource<transient>, index, index, index)
- %0:2 = call @callee(%resource0, %resource1) : (!stream.resource<external>, !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>)
+ %0:2 = util.call @callee(%resource0, %resource1) : (!stream.resource<external>, !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>)
// CHECK-NEXT: %[[RET_SUBVIEW0:.+]] = stream.resource.subview %[[RET]]#0[%[[RET]]#2] : !stream.resource<external>{%[[RET]]#1} -> !stream.resource<external>{%[[RET]]#3}
// CHECK-NEXT: %[[RET_SUBVIEW1:.+]] = stream.resource.subview %[[RET]]#4[%[[RET]]#6] : !stream.resource<transient>{%[[RET]]#5} -> !stream.resource<transient>{%[[RET]]#7}
@@ -114,11 +114,11 @@
// CHECK-NEXT: util.optimization_barrier %[[RET_SUBVIEW1]] : !stream.resource<transient>
util.optimization_barrier %0#1 : !stream.resource<transient>
- return
+ util.return
}
-func.func private @callee(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
- return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
+util.func private @callee(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
+ util.return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
}
// -----
@@ -130,7 +130,7 @@
// CHECK-LABEL: @br
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<external>, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !stream.resource<transient>, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @br(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
+util.func private @br(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. The branch
// consumes the unready resources and we expect the args to be passed directly
// to the cf.br.
@@ -149,7 +149,7 @@
// CHECK-NEXT: util.optimization_barrier %[[BB1_SUBVIEW1]]
util.optimization_barrier %bb1_resource1 : !stream.resource<transient>
- return
+ util.return
}
@@ -159,7 +159,7 @@
// CHECK-LABEL: @switch
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<external>, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !stream.resource<transient>, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @switch(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
+util.func private @switch(%resource0: !stream.resource<external>, %resource1: !stream.resource<transient>) {
%flag = arith.constant 1 : i32
// CHECK: cf.switch
@@ -182,5 +182,5 @@
// CHECK-NEXT: util.optimization_barrier %[[BB1_SUBVIEW1]]
util.optimization_barrier %bb1_resource1 : !stream.resource<transient>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
index a1fb5f1..28b64c4 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
@@ -10,7 +10,7 @@
util.global private mutable @constantGlobal : !stream.resource<constant>
// CHECK-LABEL: @globalLoad
-func.func @globalLoad() {
+util.func private @globalLoad() {
// CHECK-NEXT: %[[TIMEPOINT:.+]] = util.global.load @constantGlobal__timepoint : !stream.timepoint
// CHECK-NEXT: %[[UNREADY:.+]] = util.global.load @constantGlobal : !stream.resource<constant>
// CHECK-NEXT: %[[SIZE:.+]] = stream.resource.size %[[UNREADY]]
@@ -18,7 +18,7 @@
%0 = util.global.load @constantGlobal : !stream.resource<constant>
// CHECK-NEXT: util.optimization_barrier %[[VALUE]]
util.optimization_barrier %0 : !stream.resource<constant>
- return
+ util.return
}
// -----
@@ -34,12 +34,12 @@
util.global private mutable @mutableGlobal : !stream.resource<variable>
// CHECK-LABEL: @globalStore
-// CHECK-SAME: (%[[TIMEPOINT:.+]]: !stream.timepoint, %[[UNREADY:.+]]: !stream.resource<variable>)
-func.func private @globalStore(%arg0: !stream.resource<variable>) {
+// CHECK-SAME: (%[[UNREADY:.+]]: !stream.resource<variable>, %[[TIMEPOINT:.+]]: !stream.timepoint)
+util.func private @globalStore(%arg0: !stream.resource<variable>) {
// CHECK: util.global.store %[[TIMEPOINT]], @mutableGlobal__timepoint : !stream.timepoint
// CHECK-NEXT: util.global.store %[[UNREADY]], @mutableGlobal : !stream.resource<variable>
util.global.store %arg0, @mutableGlobal : !stream.resource<variable>
- return
+ util.return
}
// -----
@@ -50,9 +50,9 @@
// This rotates waits from callers into callees.
// CHECK-LABEL: @funcArgs
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @funcArgs(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @funcArgs(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
// CHECK-NEXT: %[[SIZE0:.+]] = stream.resource.size %[[UNREADY0]] : !stream.resource<external>
// CHECK-NEXT: %[[READY0:.+]] = stream.timepoint.await %[[TIMEPOINT0]] => %[[UNREADY0]] : !stream.resource<external>{%[[SIZE0]]}
// CHECK-NEXT: %[[SIZE1:.+]] = stream.resource.size %[[UNREADY1]] : !stream.resource<transient>
@@ -62,7 +62,8 @@
util.optimization_barrier %arg0 : !stream.resource<external>
// CHECK-NEXT: util.optimization_barrier %[[READY1]]
util.optimization_barrier %arg1 : !stream.resource<transient>
- return
+
+ util.return
}
// -----
@@ -73,15 +74,15 @@
// This rotates waits from callees into callers.
// CHECK-LABEL: @funcResults
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @funcResults(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @funcResults(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. Since the
// return should consume the await that was inserted we expect to directly use
// the function arguments.
- // CHECK: return %[[TIMEPOINT0]], %[[UNREADY0]], %[[TIMEPOINT1]], %[[UNREADY1]]
- return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
+ // CHECK: util.return %[[UNREADY0]], %[[TIMEPOINT0]], %[[UNREADY1]], %[[TIMEPOINT1]]
+ util.return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
}
// -----
@@ -94,31 +95,31 @@
// callees to callers.
// CHECK-LABEL: @caller
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @caller(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @caller(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. The call
// consumes the unready resources and we expect the args to be passed
// directly.
- // CHECK: %[[RET:.+]]:4 = call @callee(%[[TIMEPOINT0]], %[[UNREADY0]], %[[TIMEPOINT1]], %[[UNREADY1]])
- // CHECK-SAME: : (!stream.timepoint, !stream.resource<external>, !stream.timepoint, !stream.resource<transient>) -> (!stream.timepoint, !stream.resource<external>, !stream.timepoint, !stream.resource<transient>)
- %0:2 = call @callee(%arg0, %arg1) : (!stream.resource<external>, !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>)
- // CHECK-NEXT: %[[RET_SIZE0:.+]] = stream.resource.size %[[RET]]#1 : !stream.resource<external>
- // CHECK-NEXT: %[[RET_READY0:.+]] = stream.timepoint.await %[[RET]]#0 => %[[RET]]#1 : !stream.resource<external>{%[[RET_SIZE0]]}
- // CHECK-NEXT: %[[RET_SIZE1:.+]] = stream.resource.size %[[RET]]#3 : !stream.resource<transient>
- // CHECK-NEXT: %[[RET_READY1:.+]] = stream.timepoint.await %[[RET]]#2 => %[[RET]]#3 : !stream.resource<transient>{%[[RET_SIZE1]]}
+ // CHECK: %[[RET:.+]]:4 = util.call @callee(%[[UNREADY0]], %[[TIMEPOINT0]], %[[UNREADY1]], %[[TIMEPOINT1]])
+ // CHECK-SAME: : (!stream.resource<external>, !stream.timepoint, !stream.resource<transient>, !stream.timepoint) -> (!stream.resource<external>, !stream.timepoint, !stream.resource<transient>, !stream.timepoint)
+ %0:2 = util.call @callee(%arg0, %arg1) : (!stream.resource<external>, !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>)
+ // CHECK-NEXT: %[[RET_SIZE0:.+]] = stream.resource.size %[[RET]]#0 : !stream.resource<external>
+ // CHECK-NEXT: %[[RET_READY0:.+]] = stream.timepoint.await %[[RET]]#1 => %[[RET]]#0 : !stream.resource<external>{%[[RET_SIZE0]]}
+ // CHECK-NEXT: %[[RET_SIZE1:.+]] = stream.resource.size %[[RET]]#2 : !stream.resource<transient>
+ // CHECK-NEXT: %[[RET_READY1:.+]] = stream.timepoint.await %[[RET]]#3 => %[[RET]]#2 : !stream.resource<transient>{%[[RET_SIZE1]]}
// CHECK-NEXT: util.optimization_barrier %[[RET_READY0]] : !stream.resource<external>
util.optimization_barrier %0#0 : !stream.resource<external>
// CHECK-NEXT: util.optimization_barrier %[[RET_READY1]] : !stream.resource<transient>
util.optimization_barrier %0#1 : !stream.resource<transient>
- return
+ util.return
}
-func.func private @callee(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
- return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
+util.func private @callee(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) -> (!stream.resource<external>, !stream.resource<transient>) {
+ util.return %arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>
}
// -----
@@ -129,18 +130,18 @@
// This rotates waits on branch operands into successors.
// CHECK-LABEL: @br
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @br(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @br(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. The branch
// consumes the unready resources and we expect the args to be passed directly
// to the cf.br.
- // CHECK: cf.br ^bb1(%[[TIMEPOINT0]], %[[UNREADY0]], %[[TIMEPOINT1]], %[[UNREADY1]]
+ // CHECK: cf.br ^bb1(%[[UNREADY0]], %[[TIMEPOINT0]], %[[UNREADY1]], %[[TIMEPOINT1]]
cf.br ^bb1(%arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>)
-// CHECK-NEXT: ^bb1(%[[BB1_TIMEPOINT0:.+]]: !stream.timepoint, %[[BB1_UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[BB1_TIMEPOINT1:.+]]: !stream.timepoint, %[[BB1_UNREADY1:.+]]: !stream.resource<transient>):
+// CHECK-NEXT: ^bb1(%[[BB1_UNREADY0:.+]]: !stream.resource<external>, %[[BB1_TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[BB1_UNREADY1:.+]]: !stream.resource<transient>, %[[BB1_TIMEPOINT1:.+]]: !stream.timepoint):
^bb1(%bb1_arg0: !stream.resource<external>, %bb1_arg1: !stream.resource<transient>):
// CHECK-NEXT: %[[SIZE0:.+]] = stream.resource.size %[[BB1_UNREADY0]] : !stream.resource<external>
// CHECK-NEXT: %[[READY0:.+]] = stream.timepoint.await %[[BB1_TIMEPOINT0]] => %[[BB1_UNREADY0]] : !stream.resource<external>{%8}
@@ -151,7 +152,7 @@
util.optimization_barrier %bb1_arg0 : !stream.resource<external>
// CHECK-NEXT: util.optimization_barrier %[[READY1]]
util.optimization_barrier %bb1_arg1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -159,21 +160,21 @@
// Tests switch terminator expansion similar to a branch test above.
// CHECK-LABEL: @switch
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @switch(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @switch(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
%flag = arith.constant 1 : i32
// CHECK: cf.switch
- // CHECK-NEXT: default: ^bb1(%[[TIMEPOINT0]], %[[UNREADY0]], %[[TIMEPOINT1]], %[[UNREADY1]]
- // CHECK-NEXT: 0: ^bb1(%[[TIMEPOINT0]], %[[UNREADY0]], %[[TIMEPOINT1]], %[[UNREADY1]]
+ // CHECK-NEXT: default: ^bb1(%[[UNREADY0]], %[[TIMEPOINT0]], %[[UNREADY1]], %[[TIMEPOINT1]]
+ // CHECK-NEXT: 0: ^bb1(%[[UNREADY0]], %[[TIMEPOINT0]], %[[UNREADY1]], %[[TIMEPOINT1]]
cf.switch %flag : i32, [
default: ^bb1(%arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>),
0: ^bb1(%arg0, %arg1 : !stream.resource<external>, !stream.resource<transient>)
]
-// CHECK: ^bb1(%[[BB1_TIMEPOINT0:.+]]: !stream.timepoint, %[[BB1_UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[BB1_TIMEPOINT1:.+]]: !stream.timepoint, %[[BB1_UNREADY1:.+]]: !stream.resource<transient>):
+// CHECK: ^bb1(%[[BB1_UNREADY0:.+]]: !stream.resource<external>, %[[BB1_TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[BB1_UNREADY1:.+]]: !stream.resource<transient>, %[[BB1_TIMEPOINT1:.+]]: !stream.timepoint):
^bb1(%bb1_arg0: !stream.resource<external>, %bb1_arg1: !stream.resource<transient>):
// CHECK-NEXT: %[[SIZE0:.+]] = stream.resource.size %[[BB1_UNREADY0]] : !stream.resource<external>
// CHECK-NEXT: %[[READY0:.+]] = stream.timepoint.await %[[BB1_TIMEPOINT0]] => %[[BB1_UNREADY0]] : !stream.resource<external>{%8}
@@ -184,7 +185,7 @@
util.optimization_barrier %bb1_arg0 : !stream.resource<external>
// CHECK-NEXT: util.optimization_barrier %[[READY1]]
util.optimization_barrier %bb1_arg1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -196,9 +197,9 @@
// This rotates waits on producers to waits on consumers.
// CHECK-LABEL: @asyncExecuteConsume
-// CHECK-SAME: (%[[TIMEPOINT0:.+]]: !stream.timepoint, %[[UNREADY0:.+]]: !stream.resource<external>,
-// CHECK-SAME: %[[TIMEPOINT1:.+]]: !stream.timepoint, %[[UNREADY1:.+]]: !stream.resource<transient>)
-func.func private @asyncExecuteConsume(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
+// CHECK-SAME: (%[[UNREADY0:.+]]: !stream.resource<external>, %[[TIMEPOINT0:.+]]: !stream.timepoint,
+// CHECK-SAME: %[[UNREADY1:.+]]: !stream.resource<transient>, %[[TIMEPOINT1:.+]]: !stream.timepoint)
+util.func private @asyncExecuteConsume(%arg0: !stream.resource<external>, %arg1: !stream.resource<transient>) {
// NOTE: there will be extra stuff here from the arg insertion. The execution
// region consumes the unready resources and we expect the args to be captured
// directly.
@@ -219,5 +220,6 @@
%ready_results:2 = stream.timepoint.await %results_timepoint => %results#0, %results#1 : !stream.resource<external>{%arg0_size}, !stream.resource<transient>{%arg1_size}
util.optimization_barrier %ready_results#0 : !stream.resource<external>
util.optimization_barrier %ready_results#1 : !stream.resource<transient>
- return
+
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/refine_usage.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/refine_usage.mlir
index 7276441..1815518 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/refine_usage.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/refine_usage.mlir
@@ -7,25 +7,25 @@
// CHECK-LABEL: @propagateFuncCallee
// CHECK-SAME: (%[[ARG:.+]]: !stream.resource<external>, %[[SIZE:.+]]: index) -> !stream.resource<external>
-func.func private @propagateFuncCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
+util.func private @propagateFuncCallee(%arg: !stream.resource<*>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
// CHECK: stream.async.fill {{.+}} !stream.resource<external>
%fill = stream.async.fill %c123_i32, %arg[%c0 to %c128 for %c128] : i32 -> %0 as !stream.resource<*>{%size}
- // CHECK: return {{.+}} : !stream.resource<external>
- return %fill : !stream.resource<*>
+ // CHECK: util.return {{.+}} : !stream.resource<external>
+ util.return %fill : !stream.resource<*>
}
// CHECK: @propagateFuncCaller
// CHECK-SAME: -> !stream.resource<external>
-func.func @propagateFuncCaller(%size: index) -> !stream.resource<*> {
+util.func public @propagateFuncCaller(%size: index) -> !stream.resource<*> {
%c123_i32 = arith.constant 123 : i32
// CHECK: stream.async.splat {{.+}} -> !stream.resource<external>
%splat = stream.async.splat %c123_i32 : i32 -> !stream.resource<*>{%size}
// CHECK: call @propagateFuncCallee({{.+}}) : (!stream.resource<external>, index) -> !stream.resource<external>
- %result = call @propagateFuncCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
- // CHECK: return {{.+}} : !stream.resource<external>
- return %result : !stream.resource<*>
+ %result = util.call @propagateFuncCallee(%splat, %size) : (!stream.resource<*>, index) -> !stream.resource<*>
+ // CHECK: util.return {{.+}} : !stream.resource<external>
+ util.return %result : !stream.resource<*>
}
// -----
@@ -34,7 +34,7 @@
// and the type changes we don't explode.
// CHECK-LABEL: @transitionTypesAcrossTies
-func.func @transitionTypesAcrossTies() -> !hal.buffer_view {
+util.func public @transitionTypesAcrossTies() -> !hal.buffer_view {
%c4 = arith.constant 4 : index
%c255_i32 = arith.constant 255 : i32
// CHECK: %[[SPLAT:.+]] = stream.async.splat {{.+}} -> !stream.resource<external>
@@ -43,7 +43,7 @@
%1 = stream.async.transfer %0 : !stream.resource<*>{%c4} -> !stream.resource<external>{%c4}
// CHECK: stream.tensor.export %[[SPLAT]] : tensor<f32> in !stream.resource<external>{%c4} -> !hal.buffer_view
%2 = stream.tensor.export %1 : tensor<f32> in !stream.resource<external>{%c4} -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// -----
@@ -58,7 +58,7 @@
// CHECK-LABEL: @propagateBlocks
// CHECK-SAME: (%[[COND:.+]]: i1, {{.+}}) -> (!stream.resource<transient>, !stream.resource<external>)
-func.func private @propagateBlocks(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<external>) {
+util.func private @propagateBlocks(%cond: i1, %size: index) -> (!stream.resource<*>, !stream.resource<external>) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c123_i32 = arith.constant 123 : i32
@@ -89,8 +89,8 @@
^bb2(%bb2_0: !stream.resource<*>, %bb2_1: !stream.resource<*>):
// CHECK-NOT: stream.async.transfer
%external_transfer = stream.async.transfer %bb2_1 : !stream.resource<*>{%size} -> !stream.resource<external>{%size}
- // CHECK: return %[[BB2_ARG0]], %[[BB2_ARG1]] : !stream.resource<transient>, !stream.resource<external>
- return %bb2_0, %external_transfer : !stream.resource<*>, !stream.resource<external>
+ // CHECK: util.return %[[BB2_ARG0]], %[[BB2_ARG1]] : !stream.resource<transient>, !stream.resource<external>
+ util.return %bb2_0, %external_transfer : !stream.resource<*>, !stream.resource<external>
}
// -----
@@ -101,15 +101,15 @@
// CHECK-LABEL: @conflictResolution
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG0:.+]]: !stream.resource<transient>, %[[ARG1:.+]]: !stream.resource<external>, %[[SIZE:.+]]: index)
// CHECK-SAME: -> !stream.resource<external>
-func.func @conflictResolution(%cond: i1, %arg0: !stream.resource<transient>, %arg1: !stream.resource<external>, %size: index) -> !stream.resource<*> {
+util.func public @conflictResolution(%cond: i1, %arg0: !stream.resource<transient>, %arg1: !stream.resource<external>, %size: index) -> !stream.resource<*> {
// CHECK: %[[ARG0_EXT:.+]] = stream.async.transfer %[[ARG0]]
%arg0_any = stream.async.transfer %arg0 : !stream.resource<transient>{%size} -> !stream.resource<*>{%size}
// CHECK-NOT: stream.async.transfer %[[ARG1]]
%arg1_any = stream.async.transfer %arg1 : !stream.resource<external>{%size} -> !stream.resource<*>{%size}
// CHECK: %[[RET:.+]] = arith.select %[[COND]], %[[ARG0_EXT]], %[[ARG1]] : !stream.resource<external>
%0 = arith.select %cond, %arg0_any, %arg1_any : !stream.resource<*>
- // CHECK: return %[[RET]] : !stream.resource<external>
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[RET]] : !stream.resource<external>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -122,7 +122,7 @@
// CHECK-LABEL: @transferResolution
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<constant>, %[[SIZE:.+]]: index)
// CHECK-SAME: -> !stream.resource<external>
-func.func @transferResolution(%arg0: !stream.resource<constant>, %size: index) -> !stream.resource<*> {
+util.func public @transferResolution(%arg0: !stream.resource<constant>, %size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
// CHECK: %[[ARG0_EXT:.+]] = stream.async.transfer %[[ARG0]] : !stream.resource<constant>{%[[SIZE]]} -> !stream.resource<external>{%[[SIZE]]}
@@ -130,7 +130,7 @@
// CHECK: %[[RET0:.+]] = stream.async.dispatch @ex::@dispatch[%c1, %c1, %c1](%[[ARG0_EXT]][%c0 to %[[SIZE]] for %[[SIZE]]]) : (!stream.resource<external>{%[[SIZE]]}) -> %[[ARG0_EXT]]{%[[SIZE]]}
%ret0_any = stream.async.dispatch @ex::@dispatch[%c1, %c1, %c1](%arg0_any[%c0 to %size for %size]) : (!stream.resource<*>{%size}) -> %arg0_any{%size}
// return %[[RET0]] : !stream.resource<external>
- return %ret0_any : !stream.resource<*>
+ util.return %ret0_any : !stream.resource<*>
}
// -----
@@ -139,14 +139,14 @@
// CHECK-LABEL: @transferElision
// CHECK-SAME: (%[[SIZE:.+]]: index) -> !stream.resource<external>
-func.func @transferElision(%size: index) -> !stream.resource<external> {
+util.func public @transferElision(%size: index) -> !stream.resource<external> {
// CHECK: %[[ALLOCA:.+]] = stream.async.alloca
%alloca = stream.async.alloca : !stream.resource<constant>{%size}
%transfer_any = stream.async.transfer %alloca : !stream.resource<constant>{%size} -> !stream.resource<*>{%size}
// CHECK: %[[TRANSFER_EXTERNAL:.+]] = stream.async.transfer %[[ALLOCA]] : !stream.resource<constant>{%[[SIZE]]} -> !stream.resource<external>{%[[SIZE]]}
%transfer_external = stream.async.transfer %transfer_any : !stream.resource<*>{%size} -> !stream.resource<external>{%size}
- // CHECK: return %[[TRANSFER_EXTERNAL]]
- return %transfer_external : !stream.resource<external>
+ // CHECK: util.return %[[TRANSFER_EXTERNAL]]
+ util.return %transfer_external : !stream.resource<external>
}
// -----
@@ -158,25 +158,25 @@
// CHECK-LABEL: @globalLoad()
// CHECK-SAME: -> !stream.resource<variable>
-func.func private @globalLoad() -> !stream.resource<*> {
+util.func private @globalLoad() -> !stream.resource<*> {
// CHECK: %[[VALUE:.+]] = util.global.load @variable : !stream.resource<variable>
%value = util.global.load @variable : !stream.resource<variable>
%size = util.global.load @variable__size : index
// CHECK-NOT: stream.async.transfer
%0 = stream.async.transfer %value : !stream.resource<variable>{%size} -> !stream.resource<*>{%size}
- // CHECK: return %[[VALUE]]
- return %0 : !stream.resource<*>
+ // CHECK: util.return %[[VALUE]]
+ util.return %0 : !stream.resource<*>
}
// CHECK-LABEL: @globalStore
// CHECK-SAME: (%[[VALUE:.+]]: !stream.resource<variable>, %[[SIZE:.+]]: index)
-func.func private @globalStore(%value: !stream.resource<*>, %size: index) {
+util.func private @globalStore(%value: !stream.resource<*>, %size: index) {
// CHECK-NOT: stream.async.transfer
%0 = stream.async.transfer %value : !stream.resource<*>{%size} -> !stream.resource<variable>{%size}
// CHECK: util.global.store %[[VALUE]], @variable : !stream.resource<variable>
util.global.store %0, @variable : !stream.resource<variable>
util.global.store %size, @variable__size : index
- return
+ util.return
}
// -----
@@ -184,7 +184,7 @@
// Tests that explicit resource allocations are refined.
// CHECK-LABEL: @explicitAlloc
-func.func @explicitAlloc() -> !hal.buffer_view {
+util.func public @explicitAlloc() -> !hal.buffer_view {
%c0 = arith.constant 0 : index
// CHECK: %[[ALLOC:.+]] = stream.resource.alloc : !stream.resource<external>{%c0}
%0 = stream.resource.alloc : !stream.resource<*>{%c0}
@@ -192,7 +192,7 @@
%1 = stream.async.transfer %0 : !stream.resource<*>{%c0} -> !stream.resource<external>{%c0}
// CHECK: stream.tensor.export %[[ALLOC]] : tensor<f32> in !stream.resource<external>{%c0} -> !hal.buffer_view
%2 = stream.tensor.export %1 : tensor<f32> in !stream.resource<external>{%c0} -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// -----
@@ -200,7 +200,7 @@
// Tests that async allocations that escape are turned into non-transient allocs.
// CHECK-LABEL: @escapingAlloca
-func.func @escapingAlloca() -> !hal.buffer_view {
+util.func public @escapingAlloca() -> !hal.buffer_view {
%c123 = arith.constant 123 : index
// CHECK: %[[ALLOCA:.+]] = stream.async.alloca : !stream.resource<external>{%c123}
%0 = stream.async.alloca : !stream.resource<*>{%c123}
@@ -208,13 +208,13 @@
%1 = stream.async.transfer %0 : !stream.resource<*>{%c123} -> !stream.resource<external>{%c123}
// CHECK: stream.tensor.export %[[ALLOCA]] : tensor<f32> in !stream.resource<external>{%c123} -> !hal.buffer_view
%2 = stream.tensor.export %1 : tensor<f32> in !stream.resource<external>{%c123} -> !hal.buffer_view
- return %2 : !hal.buffer_view
+ util.return %2 : !hal.buffer_view
}
// -----
// CHECK-LABEL: @testIf
-func.func @testIf(%arg0: i1, %arg1: !stream.resource<*>, %arg2: !stream.resource<*>) -> !stream.resource<*> {
+util.func public @testIf(%arg0: i1, %arg1: !stream.resource<*>, %arg2: !stream.resource<*>) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
// CHECK: %[[IF:.+]] = scf.if
@@ -233,13 +233,13 @@
// CHECK-SAME: !stream.resource<external>
scf.yield %arg1 : !stream.resource<*>
}
- return %if : !stream.resource<*>
+ util.return %if : !stream.resource<*>
}
// -----
// CHECK: @testWhile
-func.func @testWhile(%arg0: i32, %arg1: !stream.resource<*>) -> (i32, !stream.resource<*>) {
+util.func public @testWhile(%arg0: i32, %arg1: !stream.resource<*>) -> (i32, !stream.resource<*>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : i32
%c4 = arith.constant 4 : index
@@ -260,8 +260,8 @@
// CHECK-SAME: !stream.resource<external>
scf.yield %add, %disp : i32, !stream.resource<*>
}
- // CHECK: return %[[IF]]#0, %[[IF]]#1 : i32, !stream.resource<external>
- return %while#0, %while#1 : i32, !stream.resource<*>
+ // CHECK: util.return %[[IF]]#0, %[[IF]]#1 : i32, !stream.resource<external>
+ util.return %while#0, %while#1 : i32, !stream.resource<*>
}
// -----
@@ -269,7 +269,7 @@
// CHECK-LABEL: @testWhileRecurse
// CHECK-SAME: %[[ARG0:.+]]: !stream.resource<external>
// CHECK-SAME: -> !stream.resource<external>
-func.func @testWhileRecurse(%arg0 : !stream.resource<*>) -> !stream.resource<external> {
+util.func public @testWhileRecurse(%arg0 : !stream.resource<*>) -> !stream.resource<external> {
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
// CHECK-DAG: %[[C4:.+]] = arith.constant 4
@@ -304,8 +304,8 @@
}
%transfer = stream.async.transfer %while#0 : !stream.resource<*>{%while#1} -> !stream.resource<external>{%while#1}
- // CHECK: return %[[WHILE]]#0
- return %transfer : !stream.resource<external>
+ // CHECK: util.return %[[WHILE]]#0
+ util.return %transfer : !stream.resource<external>
}
// -----
@@ -313,7 +313,7 @@
// CHECK-LABEL: @testForOp
// CHECK-SAME: %[[ARG0:.+]]: index
// CHECK-SAME: %[[ARG1:.+]]: !stream.resource<external>
-func.func @testForOp(%arg0 : index, %arg1 : !stream.resource<*>) -> !stream.resource<external> {
+util.func public @testForOp(%arg0 : index, %arg1 : !stream.resource<*>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
@@ -340,6 +340,6 @@
%dispatch5 = stream.async.dispatch @dispatch4(%for[%c0 to %arg0 for %arg0]) : (!stream.resource<*>{%c4}) -> !stream.resource<*>{%c4}
%transfer = stream.async.transfer %dispatch5 : !stream.resource<*>{%arg0} -> !stream.resource<external>{%arg0}
- // CHECK: return %[[DISP4]] : !stream.resource<external>
- return %transfer : !stream.resource<external>
+ // CHECK: util.return %[[DISP4]] : !stream.resource<external>
+ util.return %transfer : !stream.resource<external>
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
index 39319b0..00f5c32 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
@@ -8,7 +8,7 @@
// CHECK-SAME: (%[[OPERAND_TIMEPOINT:.+]]: !stream.timepoint,
// CHECK-SAME: %[[OPERAND:.+]]: !stream.resource<transient>,
// CHECK-SAME %[[SIZE:.+]]: index)
-func.func @extractConstants(%timepoint: !stream.timepoint, %operand: !stream.resource<transient>, %size: index) {
+util.func public @extractConstants(%timepoint: !stream.timepoint, %operand: !stream.resource<transient>, %size: index) {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
@@ -52,7 +52,7 @@
util.optimization_barrier %results#2 : !stream.resource<variable>
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %results#3 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -61,7 +61,7 @@
// CHECK-LABEL: @explicitAllocs
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @explicitAllocs(%size: index) {
+util.func public @explicitAllocs(%size: index) {
// CHECK: %[[ALLOC:.+]] = stream.resource.alloc : !stream.resource<external>{%[[SIZE]]}
%alloc = stream.resource.alloc : !stream.resource<external>{%size}
// CHECK: util.optimization_barrier %[[ALLOC]]
@@ -72,7 +72,7 @@
%empty = stream.resource.alloc : !stream.resource<transient>{%c0}
// CHECK: util.optimization_barrier %[[EMPTY]]
util.optimization_barrier %empty : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -82,7 +82,7 @@
// CHECK-LABEL: @passthroughOperands
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @passthroughOperands(%operand: !stream.resource<transient>, %size: index) {
+util.func public @passthroughOperands(%operand: !stream.resource<transient>, %size: index) {
// CHECK: = stream.cmd.execute with(%[[OPERAND]] as %[[CAPTURE:.+]]: !stream.resource<transient>{%[[SIZE]]})
%result, %result_timepoint = stream.async.execute with(%operand as %capture: !stream.resource<transient>{%size}) -> (%operand as !stream.resource<transient>{%size}) {
stream.yield %capture : !stream.resource<transient>{%size}
@@ -90,14 +90,14 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
// CHECK-LABEL: @capturedOperands
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @capturedOperands(%operand: !stream.resource<transient>, %size: index) {
+util.func public @capturedOperands(%operand: !stream.resource<transient>, %size: index) {
// CHECK: stream.cmd.execute
// CHECK-SAME: => with(%[[OPERAND]] as %[[CAPTURE:.+]]: !stream.resource<transient>{%[[SIZE]]}
%result_timepoint = stream.async.execute with(%operand as %capture: !stream.resource<transient>{%size}) {
@@ -105,7 +105,7 @@
%0 = stream.async.clone %capture : !stream.resource<transient>{%size} -> !stream.resource<transient>{%size}
stream.yield
} => !stream.timepoint
- return
+ util.return
}
// -----
@@ -114,7 +114,7 @@
// CHECK-LABEL: @tiedOperands
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @tiedOperands(%operand: !stream.resource<transient>, %size: index) {
+util.func public @tiedOperands(%operand: !stream.resource<transient>, %size: index) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c255_i32 = arith.constant 255 : i32
@@ -126,7 +126,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -136,7 +136,7 @@
// CHECK-LABEL: @tiedOperandSubviews
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<external>,
// CHECK-SAME: %[[SIZE:.+]]: index, %[[OFFSET0:.+]]: index, %[[OFFSET1:.+]]: index, %[[OFFSET2:.+]]: index, %[[LENGTH0:.+]]: index, %[[LENGTH1:.+]]: index, %[[LENGTH2:.+]]: index)
-func.func @tiedOperandSubviews(%operand: !stream.resource<external>, %size: index, %offset0: index, %offset1: index, %offset2: index, %length0: index, %length1: index, %length2: index) {
+util.func public @tiedOperandSubviews(%operand: !stream.resource<external>, %size: index, %offset0: index, %offset1: index, %offset2: index, %length0: index, %length1: index, %length2: index) {
%c0 = arith.constant 0 : index
// CHECK: %[[SUBVIEW_OFFSET:.+]] = arith.addi %[[OFFSET0]], %[[OFFSET1]]
// CHECK: %[[SUBVIEW:.+]] = stream.resource.subview %[[OPERAND]][%[[SUBVIEW_OFFSET]]] {{.*}} -> !stream.resource<external>{%[[LENGTH1]]}
@@ -156,7 +156,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[SUBVIEW]]
util.optimization_barrier %result1 : !stream.resource<external>
- return
+ util.return
}
// -----
@@ -166,7 +166,7 @@
// CHECK-LABEL: @aliasPropagation
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<external>,
// CHECK-SAME: %[[SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @aliasPropagation(%operand: !stream.resource<external>, %size: index, %offset: index, %length: index) {
+util.func public @aliasPropagation(%operand: !stream.resource<external>, %size: index, %offset: index, %length: index) {
%c0 = arith.constant 0 : index
// CHECK: stream.cmd.execute with(%[[OPERAND]] as %[[CAPTURE:.+]]: !stream.resource<external>{%[[SIZE]]})
%result, %result_timepoint = stream.async.execute with(%operand as %capture: !stream.resource<external>{%size}) -> (%operand as !stream.resource<external>{%size}) {
@@ -178,7 +178,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<external>
- return
+ util.return
}
// -----
@@ -188,7 +188,7 @@
// CHECK-LABEL: @producedResults
// CHECK-SAME: (%[[SIZE0:.+]]: index, %[[SIZE1:.+]]: index)
-func.func @producedResults(%size0: index, %size1: index) {
+util.func public @producedResults(%size0: index, %size1: index) {
%c254_i32 = arith.constant 254 : i32
%c255_i32 = arith.constant 255 : i32
// CHECK: %[[PACK:.+]]:3 = stream.resource.pack slices({
@@ -214,7 +214,7 @@
util.optimization_barrier %results#0 : !stream.resource<transient>
// CHECK: util.optimization_barrier %[[SUBALLOCA1]]
util.optimization_barrier %results#1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -225,7 +225,7 @@
// CHECK-LABEL: @locals
// CHECK-SAME: (%[[SIZE0:.+]]: index, %[[SIZE1:.+]]: index, %[[AWAIT_TIMEPOINT:.+]]: !stream.timepoint)
-func.func @locals(%size0: index, %size1: index, %await_timepoint: !stream.timepoint) -> !stream.timepoint {
+util.func public @locals(%size0: index, %size1: index, %await_timepoint: !stream.timepoint) -> !stream.timepoint {
%c254_i32 = arith.constant 254 : i32
%c255_i32 = arith.constant 255 : i32
// CHECK: %[[SLICES:.+]]:3 = stream.resource.pack on(#hal.affinity.queue<[0]>) slices({
@@ -245,8 +245,8 @@
} => !stream.timepoint
// CHECK: %[[DEALLOCA_TIMEPOINT:.+]] = stream.resource.dealloca on(#hal.affinity.queue<[0]>) await(%[[EXEC_TIMEPOINT]]) => %[[ALLOCA]] : !stream.resource<transient>{%[[SLICES]]#0} => !stream.timepoint
// CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[DEALLOCA_TIMEPOINT]], %[[EXEC_TIMEPOINT]]) => !stream.timepoint
- // CHECK: return %[[JOIN]]
- return %result_timepoint : !stream.timepoint
+ // CHECK: util.return %[[JOIN]]
+ util.return %result_timepoint : !stream.timepoint
}
// -----
@@ -257,7 +257,7 @@
// CHECK-LABEL: @concurrentRegions
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @concurrentRegions(%operand: !stream.resource<transient>, %size: index) {
+util.func public @concurrentRegions(%operand: !stream.resource<transient>, %size: index) {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c254_i32 = arith.constant 254 : i32
@@ -281,14 +281,14 @@
util.optimization_barrier %results#0 : !stream.resource<transient>
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %results#1 : !stream.resource<transient>
- return
+ util.return
}
// -----
// CHECK-LABEL: @applyAsyncSplatOp
// CHECK-SAME: (%[[SIZE:.+]]: index)
-func.func @applyAsyncSplatOp(%size: index) {
+util.func public @applyAsyncSplatOp(%size: index) {
%c255_i32 = arith.constant 255 : i32
// CHECK: %[[ALLOCA:.+]], %[[ALLOCA_TIMEPOINT:.+]] = stream.resource.alloca uninitialized : !stream.resource<transient>{%[[SIZE]]}
// CHECK: stream.cmd.execute await(%[[ALLOCA_TIMEPOINT]])
@@ -300,14 +300,14 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
// CHECK-LABEL: @applyAsyncCloneOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @applyAsyncCloneOp(%operand: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncCloneOp(%operand: !stream.resource<transient>, %size: index) {
// CHECK: %[[ALLOCA:.+]], %[[ALLOCA_TIMEPOINT:.+]] = stream.resource.alloca uninitialized : !stream.resource<transient>{%[[SIZE]]}
// CHECK: stream.cmd.execute await(%[[ALLOCA_TIMEPOINT]])
// CHECK-SAME: with(%[[OPERAND]] as %[[OPERAND_CAPTURE:.+]]: !stream.resource<transient>{%[[SIZE]]},
@@ -320,7 +320,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -330,7 +330,7 @@
// CHECK-LABEL: @applyAsyncSliceOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @applyAsyncSliceOp(%operand: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncSliceOp(%operand: !stream.resource<transient>, %size: index) {
%c16 = arith.constant 16 : index
%c128 = arith.constant 128 : index
%c144 = arith.constant 144 : index
@@ -346,14 +346,14 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
// CHECK-LABEL: @applyAsyncFillOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @applyAsyncFillOp(%operand: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncFillOp(%operand: !stream.resource<transient>, %size: index) {
%c16 = arith.constant 16 : index
%c128 = arith.constant 128 : index
%c144 = arith.constant 144 : index
@@ -366,7 +366,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -378,7 +378,7 @@
// CHECK-SAME: (%[[UPDATE:.+]]: !stream.resource<external>,
// CHECK-SAME: %[[OPERAND:.+]]: !stream.resource<transient>,
// CHECK-SAME: %[[SIZE:.+]]: index)
-func.func @applyAsyncUpdateOp(%update: !stream.resource<external>, %operand: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncUpdateOp(%update: !stream.resource<external>, %operand: !stream.resource<transient>, %size: index) {
%c16 = arith.constant 16 : index
%c128 = arith.constant 128 : index
%c144 = arith.constant 144 : index
@@ -393,7 +393,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -402,7 +402,7 @@
// CHECK-SAME: (%[[SOURCE:.+]]: !stream.resource<external>,
// CHECK-SAME: %[[TARGET:.+]]: !stream.resource<transient>,
// CHECK-SAME: %[[SIZE:.+]]: index)
-func.func @applyAsyncCopyOp(%source: !stream.resource<external>, %target: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncCopyOp(%source: !stream.resource<external>, %target: !stream.resource<transient>, %size: index) {
%c16 = arith.constant 16 : index
%c128 = arith.constant 128 : index
%c144 = arith.constant 144 : index
@@ -417,7 +417,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[TARGET]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -430,7 +430,7 @@
// CHECK-SAME: (%[[SOURCE:.+]]: !stream.resource<external>,
// CHECK-SAME: %[[TARGET:.+]]: !stream.resource<transient>,
// CHECK-SAME: %[[SIZE:.+]]: index)
-func.func @applyConcurrentAsyncCopyOp(%source: !stream.resource<external>, %target: !stream.resource<transient>, %size: index) {
+util.func public @applyConcurrentAsyncCopyOp(%source: !stream.resource<external>, %target: !stream.resource<transient>, %size: index) {
%c0 = arith.constant 0 : index
%c16 = arith.constant 16 : index
%c128 = arith.constant 128 : index
@@ -453,7 +453,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[TARGET]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -465,7 +465,7 @@
// CHECK-SAME: %[[SEND:.+]]: !stream.resource<external>, %[[SEND_SIZE:[a-z0-9]+]]: index,
// CHECK-SAME: %[[RECV:.+]]: !stream.resource<transient>, %[[RECV_SIZE:[a-z0-9]+]]: index,
// CHECK-SAME: %[[COUNT:[a-z0-9]+]]: index)
-func.func @applyAsyncCollectiveOpOutOfPlace(%channel: !stream.channel, %send: !stream.resource<external>, %send_size: index, %recv: !stream.resource<transient>, %recv_size: index, %count: index) {
+util.func public @applyAsyncCollectiveOpOutOfPlace(%channel: !stream.channel, %send: !stream.resource<external>, %send_size: index, %recv: !stream.resource<transient>, %recv_size: index, %count: index) {
%c0 = arith.constant 0 : index
// CHECK: stream.cmd.execute
// CHECK-SAME: with(%[[SEND]] as %[[SEND_CAPTURE:.+]]: !stream.resource<external>{%[[SEND_SIZE]]},
@@ -482,7 +482,7 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[RECV]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -491,7 +491,7 @@
// CHECK-LABEL: @applyAsyncTransferOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index)
-func.func @applyAsyncTransferOp(%operand: !stream.resource<transient>, %size: index) {
+util.func public @applyAsyncTransferOp(%operand: !stream.resource<transient>, %size: index) {
// CHECK: %[[ALLOCA:.+]], %[[ALLOCA_TIMEPOINT:.+]] = stream.resource.alloca uninitialized : !stream.resource<transient>{%[[SIZE]]}
// CHECK: stream.cmd.execute await(%[[ALLOCA_TIMEPOINT]])
// CHECK-SAME: with(%[[OPERAND]] as %[[OPERAND_CAPTURE:.+]]: !stream.resource<transient>{%[[SIZE]]},
@@ -504,14 +504,14 @@
} => !stream.timepoint
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
// CHECK-LABEL: @applyAsyncDispatchOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[END:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @applyAsyncDispatchOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
+util.func public @applyAsyncDispatchOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
@@ -533,7 +533,7 @@
util.optimization_barrier %results#0 : !stream.resource<transient>
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %results#1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -543,7 +543,7 @@
// CHECK-LABEL: @applyAsyncDispatchUnusedOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[END:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @applyAsyncDispatchUnusedOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
+util.func public @applyAsyncDispatchUnusedOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
@@ -572,7 +572,7 @@
util.optimization_barrier %result_timepoint : !stream.timepoint
// CHECK: util.optimization_barrier %[[OPERAND]]
util.optimization_barrier %result : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -582,7 +582,7 @@
// CHECK-LABEL: @applyAsyncCallOp
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<transient>, %[[SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[END:.+]]: index, %[[LENGTH:.+]]: index)
-func.func @applyAsyncCallOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
+util.func public @applyAsyncCallOp(%operand: !stream.resource<transient>, %size: index, %offset: index, %end: index, %length: index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
@@ -602,7 +602,7 @@
util.optimization_barrier %results#0 : !stream.resource<transient>
// CHECK: util.optimization_barrier %[[ALLOCA]]
util.optimization_barrier %results#1 : !stream.resource<transient>
- return
+ util.return
}
// -----
@@ -612,15 +612,15 @@
// CHECK-LABEL: @asyncLoadStore
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<staging>,
// CHECK-SAME: %[[SIZE:.+]]: index)
-func.func @asyncLoadStore(%operand: !stream.resource<staging>, %size: index) -> f32 {
+util.func public @asyncLoadStore(%operand: !stream.resource<staging>, %size: index) -> f32 {
%c0 = arith.constant 0 : index
%cst = arith.constant 5.4 : f32
// CHECK: stream.resource.store %cst, %[[OPERAND]][%c0] : f32 -> !stream.resource<staging>{%[[SIZE]]}
%0 = stream.async.store %cst, %operand[%c0] : f32 -> %operand as !stream.resource<staging>{%size}
// CHECK: %[[RESULT:.+]] = stream.resource.load %[[OPERAND]][%c0] : !stream.resource<staging>{%[[SIZE]]} -> f32
%1 = stream.async.load %0[%c0] : !stream.resource<staging>{%size} -> f32
- // CHECK: return %[[RESULT]]
- return %1 : f32
+ // CHECK: util.return %[[RESULT]]
+ util.return %1 : f32
}
// -----
@@ -630,7 +630,7 @@
// CHECK-LABEL: @scfFor
// CHECK-SAME: (%[[OPERAND:.+]]: !stream.resource<staging>,
// CHECK-SAME: %[[SIZE:.+]]: index)
-func.func @scfFor(%operand: !stream.resource<staging>, %size: index) -> f32 {
+util.func public @scfFor(%operand: !stream.resource<staging>, %size: index) -> f32 {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
@@ -650,6 +650,6 @@
scf.yield %2 : f32
}
- // CHECK: return %[[FOR]]
- return %sum : f32
+ // CHECK: util.return %[[FOR]]
+ util.return %sum : f32
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_concurrency.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_concurrency.mlir
index db42945..14396ce 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_concurrency.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_concurrency.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-stream-schedule-concurrency))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module( util.func(iree-stream-schedule-concurrency))" %s | FileCheck %s
// Tests that when favor=min-peak-memory we assume ops are in an order that
// reduces live memory ranges and only optimistically put them in concurrency
@@ -6,7 +6,7 @@
// CHECK-LABEL: @partitioningForMinPeakMemory
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>, %[[ARG1:.+]]: !stream.resource<external>)
-func.func @partitioningForMinPeakMemory(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external>
+util.func public @partitioningForMinPeakMemory(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external>
attributes {stream.partitioning = #stream.partitioning_config<"min-peak-memory">} {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -45,7 +45,7 @@
stream.yield %5 : !stream.resource<external>{%c20}
} => !stream.timepoint
%0 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c20}
- return %0 : !stream.resource<external>
+ util.return %0 : !stream.resource<external>
}
// -----
@@ -55,7 +55,7 @@
// CHECK-LABEL: @partitioningForMaxConcurrency
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>, %[[ARG1:.+]]: !stream.resource<external>)
-func.func @partitioningForMaxConcurrency(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external>
+util.func public @partitioningForMaxConcurrency(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external>
attributes {stream.partitioning = #stream.partitioning_config<"max-concurrency">} {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -98,7 +98,7 @@
stream.yield %5 : !stream.resource<external>{%c20}
} => !stream.timepoint
%0 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c20}
- return %0 : !stream.resource<external>
+ util.return %0 : !stream.resource<external>
}
// -----
@@ -109,7 +109,7 @@
// CHECK-LABEL: @keepTiedOpsSeparate
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>)
-func.func @keepTiedOpsSeparate(%arg0: !stream.resource<external>) -> (!stream.resource<external>, !stream.resource<external>) {
+util.func public @keepTiedOpsSeparate(%arg0: !stream.resource<external>) -> (!stream.resource<external>, !stream.resource<external>) {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
// CHECK: stream.async.execute
@@ -123,7 +123,7 @@
// CHECK-NEXT: stream.yield
stream.yield %1, %2 : !stream.resource<external>{%c4}, !stream.resource<external>{%c4}
} => !stream.timepoint
- return %results#0, %results#1 : !stream.resource<external>, !stream.resource<external>
+ util.return %results#0, %results#1 : !stream.resource<external>, !stream.resource<external>
}
// -----
@@ -138,7 +138,7 @@
// CHECK-SAME: %[[SEND0:.+]]: !stream.resource<external>, %[[SEND0_SIZE:[a-z0-9]+]]: index,
// CHECK-SAME: %[[SEND1:.+]]: !stream.resource<transient>, %[[SEND1_SIZE:[a-z0-9]+]]: index,
// CHECK-SAME: %[[RECV_SIZE:[a-z0-9]+]]: index, %[[COUNT:[a-z0-9]+]]: index)
-func.func @groupCollectiveOps(%channel: !stream.channel, %send0: !stream.resource<external>, %send0_size: index, %send1: !stream.resource<transient>, %send1_size: index, %recv_size: index, %count: index) {
+util.func public @groupCollectiveOps(%channel: !stream.channel, %send0: !stream.resource<external>, %send0_size: index, %send1: !stream.resource<transient>, %send1_size: index, %recv_size: index, %count: index) {
%c0 = arith.constant 0 : index
// CHECK: stream.async.execute
%result:2, %result_timepoint = stream.async.execute
@@ -184,5 +184,5 @@
} => !stream.timepoint
util.optimization_barrier %result#0 : !stream.resource<transient>
util.optimization_barrier %result#1 : !stream.resource<transient>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_execution.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_execution.mlir
index fab2dfb..357a936 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_execution.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_execution.mlir
@@ -1,10 +1,10 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-stream-schedule-execution))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module( util.func(iree-stream-schedule-execution))" %s | FileCheck %s
// Tests basic partitioning of multiple ops.
// CHECK-LABEL: @partitioning
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>, %[[ARG1:.+]]: !stream.resource<external>)
-func.func @partitioning(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external> {
+util.func public @partitioning(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -28,8 +28,8 @@
// CHECK-NEXT: stream.yield %[[DISPATCH2]] : !stream.resource<external>{%c20}
// CHECK-NEXT: } => !stream.timepoint
// CHECK-NEXT: %[[READY:.+]] = stream.timepoint.await %[[TIMEPOINT]] => %[[RESULT]] : !stream.resource<external>{%c20}
- // CHECK-NEXT: return %[[READY]]
- return %6 : !stream.resource<external>
+ // CHECK-NEXT: util.return %[[READY]]
+ util.return %6 : !stream.resource<external>
}
// -----
@@ -40,7 +40,7 @@
// CHECK-LABEL: @partitioningWithAffinities
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>)
-func.func @partitioningWithAffinities(%arg0: !stream.resource<external>) -> !stream.resource<external> {
+util.func public @partitioningWithAffinities(%arg0: !stream.resource<external>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -74,8 +74,8 @@
// CHECK-NEXT: %[[READY:.+]] = stream.timepoint.await
// CHECK-SAME: on(#hal.affinity.queue<[1]>)
// CHECK-SAME: %[[TIMEPOINT1]] => %[[RESULT]] : !stream.resource<external>{%c20}
- // CHECK-NEXT: return %[[READY]]
- return %dispatch2 : !stream.resource<external>
+ // CHECK-NEXT: util.return %[[READY]]
+ util.return %dispatch2 : !stream.resource<external>
}
// -----
@@ -86,7 +86,7 @@
// CHECK-LABEL: @partitioningWithConcurrentAffinities
// CHECK-SAME: (%[[ARG0:.+]]: !stream.resource<external>)
-func.func @partitioningWithConcurrentAffinities(%arg0: !stream.resource<external>) -> !stream.resource<external> {
+util.func public @partitioningWithConcurrentAffinities(%arg0: !stream.resource<external>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -128,8 +128,8 @@
// CHECK-NEXT: %[[READY:.+]] = stream.timepoint.await
// CHECK-SAME: on(#hal.affinity.queue<[2]>)
// CHECK-SAME: %[[TIMEPOINT2]] => %[[RESULT]] : !stream.resource<external>{%c20}
- // CHECK-NEXT: return %[[READY]]
- return %dispatch2 : !stream.resource<external>
+ // CHECK-NEXT: util.return %[[READY]]
+ util.return %dispatch2 : !stream.resource<external>
}
// -----
@@ -139,7 +139,7 @@
// happen in-place on the splat and we expect the execution regions to be tied.
// CHECK-LABEL: @partitionWithinBlocks
-func.func @partitionWithinBlocks(%cond: i1) -> !stream.resource<transient> {
+util.func public @partitionWithinBlocks(%cond: i1) -> !stream.resource<transient> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c1280 = arith.constant 1280 : index
@@ -156,8 +156,8 @@
// CHECK: stream.async.dispatch @ex::@dispatch_0[%c1, %c1, %c1](%[[BB1_SPLAT]][{{.+}}]) : (!stream.resource<transient>{%c1280}) -> %[[BB1_SPLAT]]{%c1280}
%3 = stream.async.dispatch @ex::@dispatch_0[%c1, %c1, %c1](%splat[%c0 to %c1280 for %c1280]) : (!stream.resource<transient>{%c1280}) -> %splat{%c1280}
// CHECK: %[[BB1_READY:.+]] = stream.timepoint.await %[[BB1_TIMEPOINT]] => %[[BB1_RESULT]]
- // CHECK: return %[[BB1_READY]]
- return %3 : !stream.resource<transient>
+ // CHECK: util.return %[[BB1_READY]]
+ util.return %3 : !stream.resource<transient>
^bb2:
// CHECK: %[[BB2_RESULT:.+]], %[[BB2_TIMEPOINT:.+]] = stream.async.execute await(%[[SPLAT_TIMEPOINT]]) =>
// CHECK-SAME: with(%[[SPLAT]] as %[[BB2_SPLAT:.+]]: !stream.resource<transient>{%c1280})
@@ -165,8 +165,8 @@
// CHECK: stream.async.dispatch @ex::@dispatch_1[%c1, %c1, %c1](%[[BB2_SPLAT]][{{.+}}]) : (!stream.resource<transient>{%c1280}) -> %[[BB2_SPLAT]]{%c1280}
%4 = stream.async.dispatch @ex::@dispatch_1[%c1, %c1, %c1](%splat[%c0 to %c1280 for %c1280]) : (!stream.resource<transient>{%c1280}) -> %splat{%c1280}
// CHECK: %[[BB2_READY:.+]] = stream.timepoint.await %[[BB2_TIMEPOINT]] => %[[BB2_RESULT]]
- // CHECK: return %[[BB2_READY]]
- return %4 : !stream.resource<transient>
+ // CHECK: util.return %[[BB2_READY]]
+ util.return %4 : !stream.resource<transient>
}
// -----
@@ -176,7 +176,7 @@
// single block and break the assumption that one block == one partition.
// CHECK-LABEL: @deviceHostDevice
-func.func @deviceHostDevice() -> !stream.resource<transient> {
+util.func public @deviceHostDevice() -> !stream.resource<transient> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c123_i8 = arith.constant 123 : i8
@@ -201,8 +201,8 @@
%5 = stream.async.transfer %4 : !stream.resource<staging>{%c1} -> !stream.resource<transient>{%c1}
// CHECK-NEXT: stream.yield %[[TRANSFER_H2D]]
// CHECK: %[[READY_H2D:.+]] = stream.timepoint.await %[[TIMEPOINT_H2D]] => %[[RESULT_H2D]] : !stream.resource<transient>{%c1}
- // CHECK: return %[[READY_H2D]]
- return %5 : !stream.resource<transient>
+ // CHECK: util.return %[[READY_H2D]]
+ util.return %5 : !stream.resource<transient>
}
// -----
@@ -210,7 +210,7 @@
// Tests that partitioning does not hoist ops across cf.asserts.
// CHECK-LABEL: @dontHoistPastAsserts
-func.func @dontHoistPastAsserts(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external> {
+util.func public @dontHoistPastAsserts(%arg0: !stream.resource<external>, %arg1: !stream.resource<external>) -> !stream.resource<external> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -242,7 +242,7 @@
// CHECK-NEXT: stream.async.dispatch @ex::@dispatch_2
%6 = stream.async.dispatch @ex::@dispatch_2[%c1, %c1, %c1](%3[%c0 to %c1280 for %c1280], %5[%c0 to %c20 for %c20]) : (!stream.resource<transient>{%c1280}, !stream.resource<transient>{%c20}) -> !stream.resource<external>{%c20}
- return %6 : !stream.resource<external>
+ util.return %6 : !stream.resource<external>
}
// -----
@@ -252,7 +252,7 @@
// the cloned values will be exported to provide the value.
// CHECK-LABEL: @cloneAcrossPartitions
-func.func @cloneAcrossPartitions(%cond: i1) -> (!stream.resource<external>, !stream.resource<transient>) {
+util.func public @cloneAcrossPartitions(%cond: i1) -> (!stream.resource<external>, !stream.resource<transient>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c123_i8 = arith.constant 123 : i8
@@ -281,8 +281,8 @@
%result = stream.async.transfer %dispatch1 : !stream.resource<transient>{%c1} -> !stream.resource<external>{%c1}
// CHECK: %[[PARTITION1:.+]] = stream.timepoint.await
- // CHECK: return %[[PARTITION1]], %[[PARTITION0]]#1
- return %result, %splat : !stream.resource<external>, !stream.resource<transient>
+ // CHECK: util.return %[[PARTITION1]], %[[PARTITION0]]#1
+ util.return %result, %splat : !stream.resource<external>, !stream.resource<transient>
}
// -----
@@ -293,7 +293,7 @@
// tracking both the host and device hazards correctly.
// CHECK-LABEL: @deviceHostDeviceCrossing
-func.func @deviceHostDeviceCrossing(%arg0: i1) -> !stream.resource<transient> {
+util.func public @deviceHostDeviceCrossing(%arg0: i1) -> !stream.resource<transient> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -314,8 +314,8 @@
// CHECK-NEXT: stream.async.dispatch @ex::@dispatch2
%4 = stream.async.dispatch @ex::@dispatch2[%c1, %c1, %c1](%1[%c0 to %c128 for %c128], %3[%c0 to %c128 for %c128]) : (!stream.resource<transient>{%c128}, !stream.resource<transient>{%c128}) -> !stream.resource<transient>{%c128}
- // CHECK: return
- return %4 : !stream.resource<transient>
+ // CHECK: util.return
+ util.return %4 : !stream.resource<transient>
}
// -----
@@ -325,13 +325,13 @@
stream.async.func private @inplaceExtern(%arg0: !stream.resource<*>, %arg1: index) -> %arg0
// CHECK-LABEL: @inplaceCall
-func.func @inplaceCall(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> (!stream.resource<*>, index) {
+util.func public @inplaceCall(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> (!stream.resource<*>, index) {
%c0 = arith.constant 0 : index
// CHECK: stream.async.execute
// CHECK-NEXT: stream.async.call
%0 = stream.async.call @inplaceExtern(%arg0[%c0 to %arg1 for %arg1], %arg2) : (!stream.resource<*>{%arg1}, index) -> %arg0{%arg1}
// CHECK: stream.timepoint.await
- return %0, %arg1 : !stream.resource<*>, index
+ util.return %0, %arg1 : !stream.resource<*>, index
}
// -----
@@ -341,7 +341,7 @@
stream.async.func private @inplaceExtern(%arg0: !stream.resource<*>, %arg1: index) -> %arg0
// CHECK-LABEL: @scfRecurse
-func.func @scfRecurse(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> (!stream.resource<*>, index) {
+util.func public @scfRecurse(%arg0: !stream.resource<*>, %arg1: index, %arg2: index) -> (!stream.resource<*>, index) {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
@@ -354,5 +354,5 @@
// CHECK: stream.timepoint.await
scf.yield %0 : !stream.resource<*>
}
- return %sum, %arg1 : !stream.resource<*>, index
+ util.return %sum, %arg1 : !stream.resource<*>, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
index f8edc0f..4f5f931 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
@@ -8,8 +8,8 @@
stream.executable private @specializeEx {
stream.executable.export public @dispatch
builtin.module {
- // CHECK: func.func @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A:.+]]: i32, %[[SITE:.+]]: index)
- func.func @dispatch(%binding: !stream.binding, %a: i32, %b: index, %c: i1, %d: i1) {
+ // CHECK: util.func public @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A:.+]]: i32, %[[SITE:.+]]: index)
+ util.func public @dispatch(%binding: !stream.binding, %a: i32, %b: index, %c: i1, %d: i1) {
// CHECK-NEXT: %[[LUT_I32:.+]] = arith.constant dense<[
// CHECK-SAME: [20],
// CHECK-SAME: [40]
@@ -35,12 +35,12 @@
util.optimization_barrier %c : i1
// CHECK-NEXT: util.optimization_barrier %[[D]] : i1
util.optimization_barrier %d : i1
- return
+ util.return
}
}
}
-// CHECK: func.func @specialize(%[[A:.+]]: i32)
-func.func @specialize(%a: i32) {
+// CHECK: util.func public @specialize(%[[A:.+]]: i32)
+util.func public @specialize(%a: i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c20 = arith.constant 20 : index
@@ -58,5 +58,5 @@
rw %capture[%c0 for %c20] : !stream.resource<transient>{%c20}
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/verify_async_access_ranges.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/verify_async_access_ranges.mlir
index 1a4d361..49ae711 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/verify_async_access_ranges.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/verify_async_access_ranges.mlir
@@ -3,21 +3,21 @@
// Tests that statically-known valid ranges pass verification.
// CHECK: @inRangeCopy
-func.func @inRangeCopy(%source: !stream.resource<*>, %target: !stream.resource<*>) -> !stream.resource<*> {
+util.func public @inRangeCopy(%source: !stream.resource<*>, %target: !stream.resource<*>) -> !stream.resource<*> {
%source_size = arith.constant 256 : index
%target_size = arith.constant 256 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
// CHECK: = stream.async.copy
%0 = stream.async.copy %source[%c128 to %c256], %target[%c128 to %c256], %c128 : !stream.resource<*>{%source_size} -> %target as !stream.resource<*>{%target_size}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
// Tests that statically-known invalid ranges emit errors.
// For more useful reporting we report all errors on an op so this expects 2.
-func.func @outOfRangeCopy(%source: !stream.resource<*>, %target: !stream.resource<*>) -> !stream.resource<*> {
+util.func public @outOfRangeCopy(%source: !stream.resource<*>, %target: !stream.resource<*>) -> !stream.resource<*> {
%source_size = arith.constant 256 : index
%target_size = arith.constant 255 : index // NOTE: too small!
%c128 = arith.constant 128 : index
@@ -27,7 +27,7 @@
// expected-error @+2 {{invalid Write access range [256 to 512 for 128] of resource %arg1 with size 255}}
// expected-error @+1 {{invalid Write access range [256 to 512 for 128] of resource %0 with size 255}}
%0 = stream.async.copy %source[%c128 to %c512], %target[%c256 to %c512], %c128 : !stream.resource<*>{%source_size} -> %target as !stream.resource<*>{%target_size}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
// -----
@@ -37,10 +37,10 @@
// and this pass could verify the conditions (size of A < size of B, etc).
// CHECK-LABEL: @dynamicSizes
-func.func @dynamicSizes(%source: !stream.resource<*>, %source_size: index, %target: !stream.resource<*>, %target_size: index) -> !stream.resource<*> {
+util.func public @dynamicSizes(%source: !stream.resource<*>, %source_size: index, %target: !stream.resource<*>, %target_size: index) -> !stream.resource<*> {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: = stream.async.copy
%0 = stream.async.copy %source[%c0 to %c128], %target[%c0 to %c128], %c128 : !stream.resource<*>{%source_size} -> %target as !stream.resource<*>{%target_size}
- return %0 : !stream.resource<*>
+ util.return %0 : !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
index 041e6da..a07a41f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
@@ -94,7 +94,79 @@
}
};
-struct ConvertFuncOp : public OpConversionPattern<mlir::func::FuncOp> {
+struct ConvertFuncOp : public OpConversionPattern<IREE::Util::FuncOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::FuncOp funcOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto &typeConverter = *getTypeConverter();
+
+ // Convert the input signature types.
+ // TODO(benvanik): dynamic shapes by passing in tensor dynamic dims.
+ auto originalType = funcOp.getFunctionType();
+ TypeConverter::SignatureConversion newSignature(
+ originalType.getNumInputs());
+ for (auto argType : llvm::enumerate(originalType.getInputs())) {
+ if (failed(typeConverter.convertSignatureArg(
+ argType.index(), argType.value(), newSignature))) {
+ return rewriter.notifyMatchFailure(funcOp,
+ "failed to convert arg type");
+ }
+ }
+ SmallVector<Type> newResultTypes;
+ if (failed(typeConverter.convertTypes(originalType.getResults(),
+ newResultTypes))) {
+ return rewriter.notifyMatchFailure(funcOp,
+ "failed to convert result type");
+ }
+
+ // Replace function.
+ auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
+ newFuncOp.getBlocks().clear();
+ rewriter.inlineRegionBefore(funcOp.getFunctionBody(),
+ newFuncOp.getFunctionBody(), newFuncOp.end());
+ newFuncOp.setType(rewriter.getFunctionType(newSignature.getConvertedTypes(),
+ newResultTypes));
+ if (failed(rewriter.convertRegionTypes(&newFuncOp.getFunctionBody(),
+ typeConverter, &newSignature))) {
+ return rewriter.notifyMatchFailure(funcOp,
+ "failed to convert region types");
+ }
+ rewriter.eraseOp(funcOp);
+ return success();
+ }
+};
+
+struct ConvertCallOp : public OpConversionPattern<IREE::Util::CallOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::CallOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ SmallVector<Type> resultTypes;
+ if (failed(getTypeConverter()->convertTypes(op.getResultTypes(),
+ resultTypes))) {
+ return rewriter.notifyMatchFailure(op, "unable to convert result types");
+ }
+ auto newOp = rewriter.replaceOpWithNewOp<IREE::Util::CallOp>(
+ op, resultTypes, op.getCallee(), adaptor.getOperands(),
+ adaptor.getTiedOperandsAttr());
+ newOp->setDialectAttrs(op->getDialectAttrs());
+ return success();
+ }
+};
+
+struct ConvertReturnOp : public OpConversionPattern<IREE::Util::ReturnOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(IREE::Util::ReturnOp returnOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ rewriter.replaceOpWithNewOp<IREE::Util::ReturnOp>(returnOp,
+ adaptor.getOperands());
+ return success();
+ }
+};
+
+struct ConvertFuncFuncOp : public OpConversionPattern<mlir::func::FuncOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
matchAndRewrite(mlir::func::FuncOp funcOp, OpAdaptor adaptor,
@@ -137,7 +209,7 @@
}
};
-struct ConvertCallOp : public OpConversionPattern<mlir::func::CallOp> {
+struct ConvertFuncCallOp : public OpConversionPattern<mlir::func::CallOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
matchAndRewrite(mlir::func::CallOp op, OpAdaptor adaptor,
@@ -153,7 +225,7 @@
}
};
-struct ConvertReturnOp : public OpConversionPattern<mlir::func::ReturnOp> {
+struct ConvertFuncReturnOp : public OpConversionPattern<mlir::func::ReturnOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
matchAndRewrite(mlir::func::ReturnOp returnOp, OpAdaptor adaptor,
@@ -259,28 +331,42 @@
// We need to rewrite certain types on operands/results so use the default
// dynamic legality checker to force any ops using such types to run through
// our patterns.
+
conversionTarget.addDynamicallyLegalOp<IREE::Util::InitializerOp>(
[&](IREE::Util::InitializerOp op) {
return typeConverter.isLegal(&op.getBody());
});
- conversionTarget.addDynamicallyLegalOp<mlir::func::FuncOp>(
- [&](mlir::func::FuncOp op) {
+ conversionTarget.addDynamicallyLegalOp<IREE::Util::FuncOp>(
+ [&](IREE::Util::FuncOp op) {
return typeConverter.isSignatureLegal(op.getFunctionType()) &&
typeConverter.isLegal(&op.getBody());
});
+ addGenericLegalOp<IREE::Util::CallOp>(conversionTarget, typeConverter);
+ addGenericLegalOp<IREE::Util::ReturnOp>(conversionTarget, typeConverter);
+ patterns.insert<ConvertInitializerOp, ConvertFuncOp, ConvertCallOp,
+ ConvertReturnOp>(typeConverter, context);
+
+ conversionTarget.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) {
+ return typeConverter.isSignatureLegal(op.getFunctionType()) &&
+ typeConverter.isLegal(&op.getBody());
+ });
addGenericLegalOp<func::CallOp>(conversionTarget, typeConverter);
addGenericLegalOp<func::ReturnOp>(conversionTarget, typeConverter);
+ patterns.insert<ConvertFuncFuncOp, ConvertFuncCallOp, ConvertFuncReturnOp>(
+ typeConverter, context);
+
addGenericLegalOp<cf::BranchOp>(conversionTarget, typeConverter);
addGenericLegalOp<cf::CondBranchOp>(conversionTarget, typeConverter);
addGenericLegalOp<cf::SwitchOp>(conversionTarget, typeConverter);
+ patterns.insert<ConvertBranchOp, ConvertCondBranchOp, ConvertSwitchOp>(
+ typeConverter, context);
+
addGenericLegalOp<arith::SelectOp>(conversionTarget, typeConverter);
+ patterns.insert<ConvertSelectOp>(typeConverter, context);
+
addGenericLegalOp<scf::IfOp>(conversionTarget, typeConverter);
addGenericLegalOp<scf::YieldOp>(conversionTarget, typeConverter);
- patterns
- .insert<ConvertInitializerOp, ConvertFuncOp, ConvertCallOp,
- ConvertReturnOp, ConvertBranchOp, ConvertCondBranchOp,
- ConvertSwitchOp, ConvertSelectOp, ConvertIfOp, ConvertYieldOp>(
- typeConverter, context);
+ patterns.insert<ConvertIfOp, ConvertYieldOp>(typeConverter, context);
}
} // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/test/memref_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/test/memref_ops.mlir
index deb6cbd..745037b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/test/memref_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/MemRefToUtil/test/memref_ops.mlir
@@ -5,89 +5,89 @@
// -----
// Must be rank-0 or rank-1.
// expected-error @-3 {{conversion to util failed}}
-func.func @verify_invalid_rank_2(%buffer: memref<4x2xf32>, %idx: index) -> f32{
+util.func @verify_invalid_rank_2(%buffer: memref<4x2xf32>, %idx: index) -> f32{
// expected-error @below {{failed to legalize operation 'memref.load'}}
%0 = memref.load %buffer[%idx, %idx] : memref<4x2xf32>
- return %0 : f32
+ util.return %0 : f32
}
// -----
// Must have an identity map.
// expected-error @-3 {{conversion to util failed}}
#map = affine_map<(d0)[s0] -> (d0 * s0)>
-func.func @verify_invalid_non_identity_map(%buffer: memref<4xf32, #map>, %idx: index) -> f32 {
+util.func @verify_invalid_non_identity_map(%buffer: memref<4xf32, #map>, %idx: index) -> f32 {
// expected-error @below {{failed to legalize operation 'memref.load'}}
%0 = memref.load %buffer[%idx] : memref<4xf32, #map>
- return %0 : f32
+ util.return %0 : f32
}
// -----
// CHECK-LABEL: @assume_alignment
-func.func @assume_alignment(%buffer: memref<?xf32>) {
+util.func @assume_alignment(%buffer: memref<?xf32>) {
// CHECK-NOT: assume_alignment
memref.assume_alignment %buffer, 64 : memref<?xf32>
- func.return
+ util.return
}
// -----
// CHECK-LABEL: @cast
-func.func @cast(%buffer: memref<?xf32>) -> memref<5xf32> {
+util.func @cast(%buffer: memref<?xf32>) -> memref<5xf32> {
// CHECK-NOT: memref.cast
%0 = memref.cast %buffer : memref<?xf32> to memref<5xf32>
- // CHECK: return %arg0 : !util.buffer
- func.return %0 : memref<5xf32>
+ // CHECK: util.return %arg0 : !util.buffer
+ util.return %0 : memref<5xf32>
}
// -----
// CHECK-LABEL: @alloca() -> !util.buffer
-func.func @alloca() -> memref<16xi32> {
+util.func @alloca() -> memref<16xi32> {
// CHECK: %[[ALLOCATION_SIZE:.+]] = arith.constant 64 : index
// CHECK: %[[BUFFER:.+]] = util.buffer.alloc uninitialized : !util.buffer{%[[ALLOCATION_SIZE]]}
%0 = memref.alloca() : memref<16xi32>
- // CHECK: return %[[BUFFER]]
- return %0 : memref<16xi32>
+ // CHECK: util.return %[[BUFFER]]
+ util.return %0 : memref<16xi32>
}
// -----
// CHECK-LABEL: @alloca_dynamic_size
// CHECK-SAME: (%[[LENGTH:.+]]: index)
-func.func @alloca_dynamic_size(%length : index) -> memref<?xi32> {
+util.func @alloca_dynamic_size(%length : index) -> memref<?xi32> {
// CHECK: %[[ELEM_SIZE:.+]] = arith.constant 4 : index
// CHECK: %[[ALLOCATION_SIZE:.+]] = arith.muli %[[LENGTH]], %[[ELEM_SIZE]] : index
// CHECK: %[[BUFFER:.+]] = util.buffer.alloc uninitialized : !util.buffer{%[[ALLOCATION_SIZE]]}
%0 = memref.alloca(%length) : memref<?xi32>
- // CHECK: return %[[BUFFER]]
- return %0 : memref<?xi32>
+ // CHECK: util.return %[[BUFFER]]
+ util.return %0 : memref<?xi32>
}
// -----
// CHECK-LABEL: @alloc_i16
// CHECK-SAME: (%[[IDX0:.+]]: index) -> !util.buffer {
-func.func @alloc_i16(%idx0: index) -> memref<4xi16> {
+util.func @alloc_i16(%idx0: index) -> memref<4xi16> {
// CHECK: %[[C8:.*]] = arith.constant 8 : index
// CHECK: %[[BUFFER:.*]] = util.buffer.alloc uninitialized : !util.buffer{%[[C8]]}
%0 = memref.alloca() : memref<4xi16>
- // CHECK: return %[[BUFFER]]
- return %0 : memref<4xi16>
+ // CHECK: util.return %[[BUFFER]]
+ util.return %0 : memref<4xi16>
}
// -----
// CHECK-LABEL: @alloc_index
// CHECK-SAME: (%[[IDX0:.+]]: index) -> !util.buffer {
-func.func @alloc_index(%idx0: index) -> memref<4xindex> {
+util.func @alloc_index(%idx0: index) -> memref<4xindex> {
// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-DAG: %[[SIZEOF:.*]] = util.sizeof index
// CHECK: %[[SZ:.*]] = arith.muli %[[SIZEOF]], %[[C4]]
// CHECK: %[[BUFFER:.*]] = util.buffer.alloc uninitialized : !util.buffer{%[[SZ]]}
%0 = memref.alloca() : memref<4xindex>
- // CHECK: return %[[BUFFER]]
- return %0 : memref<4xindex>
+ // CHECK: util.return %[[BUFFER]]
+ util.return %0 : memref<4xindex>
}
// -----
// CHECK-LABEL: @load_store_f32
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[IDX0:.+]]: index, %[[IDX1:.+]]: index) -> f32 {
-func.func @load_store_f32(%buffer: memref<?xf32>, %idx0: index, %idx1: index) -> f32 {
+util.func @load_store_f32(%buffer: memref<?xf32>, %idx0: index, %idx1: index) -> f32 {
// CHECK: %[[BUFFER_SIZE:.+]] = util.buffer.size %[[BUFFER]]
// CHECK: %[[IDX0_BYTES:.+]] = arith.muli %[[IDX0]], %c4
// CHECK: %[[VALUE:.+]] = util.buffer.load %[[BUFFER]][%[[IDX0_BYTES]] for %c4] : !util.buffer{%[[BUFFER_SIZE]]} -> f32
@@ -95,8 +95,8 @@
// CHECK: %[[IDX1_BYTES:.+]] = arith.muli %[[IDX1]], %c4
// CHECK: util.buffer.store %[[VALUE]], %[[BUFFER]][%[[IDX1_BYTES]] for %c4] : f32 -> !util.buffer{%[[BUFFER_SIZE]]}
memref.store %0, %buffer[%idx1] : memref<?xf32>
- // CHECK: return %[[VALUE]] : f32
- return %0 : f32
+ // CHECK: util.return %[[VALUE]] : f32
+ util.return %0 : f32
}
// -----
@@ -108,21 +108,21 @@
// CHECK-LABEL: @constant_global_f32
// CHECK-SAME: (%[[IDX:.+]]: index) -> f32 {
-func.func @constant_global_f32(%idx: index) -> f32 {
+util.func @constant_global_f32(%idx: index) -> f32 {
// CHECK: %[[BUFFER:.+]] = util.global.load @__constant_f32 : !util.buffer
%0 = memref.get_global @__constant_f32 : memref<2xf32>
// CHECK: %[[BUFFER_SIZE:.+]] = util.buffer.size %[[BUFFER]]
// CHECK: %[[IDX_BYTES:.+]] = arith.muli %[[IDX]], %c4
// CHECK: %[[VALUE:.+]] = util.buffer.load %[[BUFFER]][%[[IDX_BYTES]] for %c4] : !util.buffer{%[[BUFFER_SIZE]]} -> f32
%1 = memref.load %0[%idx] : memref<2xf32>
- // CHECK: return %[[VALUE]] : f32
- return %1 : f32
+ // CHECK: util.return %[[VALUE]] : f32
+ util.return %1 : f32
}
// -----
// CHECK-LABEL: @load_store_i16
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[IDX0:.+]]: index, %[[IDX1:.+]]: index, %[[VALUE:.+]]: i32) -> i32 {
-func.func @load_store_i16(%buffer: memref<?xi16>, %idx0: index, %idx1: index, %value: i16) -> i16 {
+util.func @load_store_i16(%buffer: memref<?xi16>, %idx0: index, %idx1: index, %value: i16) -> i16 {
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[SZ:.*]] = util.buffer.size %[[BUFFER]]
// CHECK-DAG: %[[OFS0:.*]] = arith.muli %[[IDX0]], %[[C2]] : index
@@ -133,14 +133,14 @@
// CHECK: %[[LD:.*]] = util.buffer.load %[[BUFFER]][%[[OFS1]] for %c2] : !util.buffer{%[[SZ]]} -> i16
// CHECK: %[[UCST1:.*]] = builtin.unrealized_conversion_cast %[[LD]] : i16 to i32
%1 = memref.load %buffer[%idx1] : memref<?xi16>
- // CHECK: return %[[UCST1]]
- return %1 : i16
+ // CHECK: util.return %[[UCST1]]
+ util.return %1 : i16
}
// -----
// CHECK-LABEL: @load_store_index
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[IDX0:.+]]: index, %[[IDX1:.+]]: index, %[[VALUE:.+]]: index) -> index {
-func.func @load_store_index(%buffer: memref<?xindex>, %idx0: index, %idx1: index, %value: index) -> index {
+util.func @load_store_index(%buffer: memref<?xindex>, %idx0: index, %idx1: index, %value: index) -> index {
// CHECK-DAG: %[[SIZEOF:.*]] = util.sizeof index
// CHECK-DAG: %[[SZ:.*]] = util.buffer.size %[[BUFFER]]
// CHECK-DAG: %[[OFS0:.*]] = arith.muli %[[SIZEOF]], %[[IDX0]] : index
@@ -149,30 +149,30 @@
// CHECK: %[[OFS1:.*]] = arith.muli %[[SIZEOF]], %[[IDX1]] : index
// CHECK: %[[LD:.*]] = util.buffer.load %[[BUFFER]][%[[OFS1]] for %[[SIZEOF]]] : !util.buffer{%[[SZ]]} -> index
%1 = memref.load %buffer[%idx1] : memref<?xindex>
- // CHECK: return %[[LD]]
- return %1 : index
+ // CHECK: util.return %[[LD]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @dim_i16
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[IDX0:.+]]: index) -> index {
-func.func @dim_i16(%buffer: memref<?xi16>, %idx0: index) -> index {
+util.func @dim_i16(%buffer: memref<?xi16>, %idx0: index) -> index {
// CHECK: %[[C2:.*]] = arith.constant 2 : index
// CHECK: %[[SZ:.*]] = util.buffer.size %[[BUFFER]] : !util.buffer
// CHECK: %[[DV:.*]] = arith.floordivsi %[[SZ]], %[[C2]] : index
%0 = memref.dim %buffer, %idx0 : memref<?xi16>
- // CHECK: return %[[DV]]
- return %0 : index
+ // CHECK: util.return %[[DV]]
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @dim_index
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[IDX0:.+]]: index) -> index {
-func.func @dim_index(%buffer: memref<?xindex>, %idx0: index) -> index {
+util.func @dim_index(%buffer: memref<?xindex>, %idx0: index) -> index {
// CHECK: %[[SIZEOF:.*]] = util.sizeof index
// CHECK: %[[SZ:.*]] = util.buffer.size %[[BUFFER]] : !util.buffer
// CHECK: %[[DV:.*]] = arith.floordivsi %[[SZ]], %[[SIZEOF]] : index
%0 = memref.dim %buffer, %idx0 : memref<?xindex>
- // CHECK: return %[[DV]]
- return %0 : index
+ // CHECK: util.return %[[DV]]
+ util.return %0 : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/test/structural_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/Conversion/test/structural_ops.mlir
index 357d55f..b815163 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/test/structural_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/test/structural_ops.mlir
@@ -5,69 +5,69 @@
// CHECK: util.initializer
util.initializer {
- // CHECK: %[[VALUE:.+]] = func.call @extern
- %value = func.call @extern() : () -> memref<?xi8>
+ // CHECK: %[[VALUE:.+]] = util.call @extern
+ %value = util.call @extern() : () -> memref<?xi8>
// CHECK: cf.br ^bb1(%[[VALUE]] : !util.buffer)
cf.br ^bb1(%value : memref<?xi8>)
// CHECK: ^bb1(%[[ARG:.+]]: !util.buffer)
^bb1(%block_arg: memref<?xi8>):
util.return
}
-func.func private @extern() -> memref<?xi8>
+util.func private @extern() -> memref<?xi8>
// -----
// CHECK-LABEL: @funcOp
// CHECK-SAME: (%[[ARG0:.+]]: !util.buffer) -> !util.buffer
-func.func @funcOp(%arg0: memref<?xi8>) -> memref<?xi8> {
- // CHECK: return %[[ARG0]] : !util.buffer
- return %arg0 : memref<?xi8>
+util.func public @funcOp(%arg0: memref<?xi8>) -> memref<?xi8> {
+ // CHECK: util.return %[[ARG0]] : !util.buffer
+ util.return %arg0 : memref<?xi8>
}
// -----
// CHECK-LABEL: @callOp
// CHECK-SAME: (%[[ARG0:.+]]: !util.buffer) -> !util.buffer
-func.func @callOp(%arg0: memref<?xi8>) -> memref<?xi8> {
- // CHECK: %[[RET0:.+]] = call @extern(%[[ARG0]]) : (!util.buffer) -> !util.buffer
- %ret0 = call @extern(%arg0) : (memref<?xi8>) -> memref<?xi8>
- // CHECK: return %[[RET0]] : !util.buffer
- return %ret0 : memref<?xi8>
+util.func public @callOp(%arg0: memref<?xi8>) -> memref<?xi8> {
+ // CHECK: %[[RET0:.+]] = util.call @extern(%[[ARG0]]) : (!util.buffer) -> !util.buffer
+ %ret0 = util.call @extern(%arg0) : (memref<?xi8>) -> memref<?xi8>
+ // CHECK: util.return %[[RET0]] : !util.buffer
+ util.return %ret0 : memref<?xi8>
}
-// CHECK: func.func private @extern(!util.buffer) -> !util.buffer
-func.func private @extern(memref<?xi8>) -> memref<?xi8>
+// CHECK: util.func private @extern(%arg0: !util.buffer) -> !util.buffer
+util.func private @extern(memref<?xi8>) -> memref<?xi8>
// -----
// CHECK-LABEL: @brOp
// CHECK-SAME: (%[[ARG0:.+]]: !util.buffer) -> !util.buffer
-func.func @brOp(%arg0: memref<?xi8>) -> memref<?xi8> {
+util.func public @brOp(%arg0: memref<?xi8>) -> memref<?xi8> {
// CHECK: cf.br ^bb1(%[[ARG0]] : !util.buffer)
cf.br ^bb1(%arg0 : memref<?xi8>)
// CHECK: ^bb1(%[[BB1_ARG0:.+]]: !util.buffer):
^bb1(%bb1_arg0: memref<?xi8>):
- // CHECK: return %[[BB1_ARG0]] : !util.buffer
- return %bb1_arg0 : memref<?xi8>
+ // CHECK: util.return %[[BB1_ARG0]] : !util.buffer
+ util.return %bb1_arg0 : memref<?xi8>
}
// -----
// CHECK-LABEL: @condBrOp
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG0:.+]]: !util.buffer, %[[ARG1:.+]]: !util.buffer) -> !util.buffer
-func.func @condBrOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
+util.func public @condBrOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
// CHECK: cf.cond_br %[[COND]], ^bb1(%[[ARG0]] : !util.buffer), ^bb1(%[[ARG1]] : !util.buffer)
cf.cond_br %cond, ^bb1(%arg0 : memref<?xi8>), ^bb1(%arg1 : memref<?xi8>)
// CHECK: ^bb1(%[[BB1_ARG0:.+]]: !util.buffer):
^bb1(%bb1_arg0 : memref<?xi8>):
- // CHECK: return %[[BB1_ARG0]] : !util.buffer
- return %bb1_arg0 : memref<?xi8>
+ // CHECK: util.return %[[BB1_ARG0]] : !util.buffer
+ util.return %bb1_arg0 : memref<?xi8>
}
// -----
// CHECK-LABEL: @switchOp
// CHECK-SAME: (%[[FLAG:.+]]: i32, %[[ARG0:.+]]: !util.buffer, %[[ARG1:.+]]: !util.buffer) -> !util.buffer
-func.func @switchOp(%flag: i32, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
+util.func public @switchOp(%flag: i32, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
// CHECK: cf.switch %[[FLAG]] : i32, [
// CHECK: default: ^bb1(%[[ARG0]] : !util.buffer),
// CHECK: 0: ^bb1(%[[ARG1]] : !util.buffer)
@@ -78,26 +78,26 @@
]
// CHECK: ^bb1(%[[BB1_ARG0:.+]]: !util.buffer):
^bb1(%bb1_arg0 : memref<?xi8>):
- // CHECK: return %[[BB1_ARG0]] : !util.buffer
- return %bb1_arg0 : memref<?xi8>
+ // CHECK: util.return %[[BB1_ARG0]] : !util.buffer
+ util.return %bb1_arg0 : memref<?xi8>
}
// -----
// CHECK-LABEL: @selectOp
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG0:.+]]: !util.buffer, %[[ARG1:.+]]: !util.buffer) -> !util.buffer
-func.func @selectOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
+util.func public @selectOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
// CHECK: %[[RET0:.+]] = arith.select %[[COND]], %[[ARG0]], %[[ARG1]] : !util.buffer
%ret0 = arith.select %cond, %arg0, %arg1 : memref<?xi8>
- // CHECK: return %[[RET0]] : !util.buffer
- return %ret0 : memref<?xi8>
+ // CHECK: util.return %[[RET0]] : !util.buffer
+ util.return %ret0 : memref<?xi8>
}
// -----
// CHECK-LABEL: @ifOp
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG0:.+]]: !util.buffer, %[[ARG1:.+]]: !util.buffer) -> !util.buffer
-func.func @ifOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
+util.func public @ifOp(%cond: i1, %arg0: memref<?xi8>, %arg1: memref<?xi8>) -> memref<?xi8> {
// CHECK: %[[RET0:.+]] = scf.if %[[COND]] -> (!util.buffer)
%ret0 = scf.if %cond -> (memref<?xi8>) {
// CHECK: scf.yield %[[ARG0]] : !util.buffer
@@ -106,6 +106,6 @@
// CHECK: scf.yield %[[ARG1]] : !util.buffer
scf.yield %arg1 : memref<?xi8>
}
- // CHECK: return %[[RET0]] : !util.buffer
- return %ret0 : memref<?xi8>
+ // CHECK: util.return %[[RET0]] : !util.buffer
+ util.return %ret0 : memref<?xi8>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp
index c1900fe..9dc9b18 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilDialect.cpp
@@ -112,9 +112,13 @@
UtilDialect::UtilDialect(MLIRContext *context)
: Dialect(getDialectNamespace(), context, TypeID::get<UtilDialect>()) {
+ context->loadDialect<arith::ArithDialect>();
+
addInterfaces<UtilOpAsmInterface, UtilInlinerInterface>();
+
registerAttributes();
registerTypes();
+
#define GET_OP_LIST
addOperations<
#include "iree/compiler/Dialect/Util/IR/UtilOps.cpp.inc"
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp
index 5e39aad..691e5d5 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilExternalModels.cpp
@@ -18,88 +18,9 @@
namespace {
-// Since all details of the interface are provided via default implementations,
-// we can just have one templated external model to apply per op, vs one
-// explicit model per op.
-struct GenericNumericCastExternalModel {
- template <typename OpTy>
- struct ExternalModel
- : public NumericCastOpInterface::ExternalModel<ExternalModel<OpTy>,
- OpTy> {};
-
- template <typename OpTy>
- static void add(MLIRContext *ctx) {
- OpTy::template attachInterface<ExternalModel<OpTy>>(*ctx);
- }
-
- template <typename OpTy1, typename OpTy2, typename... More>
- static void add(MLIRContext *ctx) {
- add<OpTy1>(ctx);
- add<OpTy2, More...>(ctx);
- }
-};
-
-struct InsertSliceOpTiedOpInterface
- : public TiedOpInterface::ExternalModel<InsertSliceOpTiedOpInterface,
- tensor::InsertSliceOp> {
- Value getTiedResult(Operation *op, unsigned resultIndex) const {
- auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
- return IREE::Util::TiedOpInterface::findTiedBaseValue(
- insertSliceOp.getDest());
- }
-
- ::std::optional<unsigned>
- getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
- return {1}; // dest
- }
-
- SmallVector<int64_t> getTiedResultOperandIndices(Operation *op) const {
- return {1}; // dest
- }
-};
-
-template <typename OpTy>
-struct LinalgOpTiedOpInterface
- : public TiedOpInterface::ExternalModel<LinalgOpTiedOpInterface<OpTy>,
- OpTy> {
- Value getTiedResult(Operation *op, unsigned resultIndex) const {
- auto linalgOp = cast<OpTy>(op);
- return IREE::Util::TiedOpInterface::findTiedBaseValue(
- linalgOp.getDpsInits()[resultIndex]);
- }
-
- ::std::optional<unsigned>
- getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
- auto linalgOp = cast<OpTy>(op);
- return {linalgOp.getDpsInitsMutable()[resultIndex].getOperandNumber()};
- }
-
- SmallVector<int64_t> getTiedResultOperandIndices(Operation *op) const {
- SmallVector<int64_t> result;
- for (unsigned i = 0; i < op->getNumResults(); ++i)
- result.push_back(*getTiedResultOperandIndex(op, i));
- return result;
- }
-};
-
-/// Helper structure that iterates over all LinalgOps in `OpTys` and registers
-/// the `TiedOpInterface` with each of them.
-template <typename... Ops>
-struct LinalgOpTiedOpInterfaceHelper {
- static void registerOpInterface(MLIRContext *ctx) {
- (void)std::initializer_list<int>{
- 0, (Ops::template attachInterface<LinalgOpTiedOpInterface<Ops>>(*ctx),
- 0)...};
- }
-};
-
struct GlobalOpInterfaceExternalModel
: public GlobalOpInterface::ExternalModel<GlobalOpInterfaceExternalModel,
ml_program::GlobalOp> {
- static void add(MLIRContext *ctx) {
- ml_program::GlobalOp::attachInterface<GlobalOpInterfaceExternalModel>(*ctx);
- }
-
Attribute getGlobalInitialValue(Operation *op) const {
return cast<ml_program::GlobalOp>(op).getValueAttr();
}
@@ -157,61 +78,146 @@
}
};
+// Since all details of the interface are provided via default implementations,
+// we can just have one templated external model to apply per op, vs one
+// explicit model per op.
+struct GenericNumericCastExternalModel {
+ template <typename OpTy>
+ struct ExternalModel
+ : public NumericCastOpInterface::ExternalModel<ExternalModel<OpTy>,
+ OpTy> {};
+
+ template <typename OpTy>
+ static void add(MLIRContext *context) {
+ OpTy::template attachInterface<ExternalModel<OpTy>>(*context);
+ }
+
+ template <typename OpTy1, typename OpTy2, typename... More>
+ static void add(MLIRContext *context) {
+ add<OpTy1>(context);
+ add<OpTy2, More...>(context);
+ }
+};
+
+struct InsertSliceOpTiedOpInterface
+ : public TiedOpInterface::ExternalModel<InsertSliceOpTiedOpInterface,
+ tensor::InsertSliceOp> {
+ Value getTiedResult(Operation *op, unsigned resultIndex) const {
+ auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
+ return IREE::Util::TiedOpInterface::findTiedBaseValue(
+ insertSliceOp.getDest());
+ }
+
+ ::std::optional<unsigned>
+ getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
+ return {1}; // dest
+ }
+
+ SmallVector<int64_t> getTiedResultOperandIndices(Operation *op) const {
+ return {1}; // dest
+ }
+};
+
+template <typename OpTy>
+struct LinalgOpTiedOpInterface
+ : public TiedOpInterface::ExternalModel<LinalgOpTiedOpInterface<OpTy>,
+ OpTy> {
+ Value getTiedResult(Operation *op, unsigned resultIndex) const {
+ auto linalgOp = cast<OpTy>(op);
+ return IREE::Util::TiedOpInterface::findTiedBaseValue(
+ linalgOp.getDpsInits()[resultIndex]);
+ }
+
+ ::std::optional<unsigned>
+ getTiedResultOperandIndex(Operation *op, unsigned resultIndex) const {
+ auto linalgOp = cast<OpTy>(op);
+ return {linalgOp.getDpsInitsMutable()[resultIndex].getOperandNumber()};
+ }
+
+ SmallVector<int64_t> getTiedResultOperandIndices(Operation *op) const {
+ SmallVector<int64_t> result;
+ for (unsigned i = 0; i < op->getNumResults(); ++i)
+ result.push_back(*getTiedResultOperandIndex(op, i));
+ return result;
+ }
+};
+
+/// Helper structure that iterates over all LinalgOps in `OpTys` and registers
+/// the `TiedOpInterface` with each of them.
+template <typename... Ops>
+struct LinalgOpTiedOpInterfaceHelper {
+ static void registerOpInterface(MLIRContext *context) {
+ (void)std::initializer_list<int>{
+ 0,
+ (Ops::template attachInterface<LinalgOpTiedOpInterface<Ops>>(*context),
+ 0)...};
+ }
+};
+
} // namespace
void registerUtilExternalModels(DialectRegistry ®istry) {
// Must ensure that any dependent dialects are registered.
- registry.insert<arith::ArithDialect, linalg::LinalgDialect,
- ml_program::MLProgramDialect, tensor::TensorDialect>();
+ registry.insert<arith::ArithDialect>();
+ registry.insert<linalg::LinalgDialect>();
+ registry.insert<ml_program::MLProgramDialect>();
+ registry.insert<tensor::TensorDialect>();
- registry.addExtension(+[](MLIRContext *ctx,
- ml_program::MLProgramDialect *dialect) {
- ml_program::GlobalOp::attachInterface<GlobalOpInterfaceExternalModel>(*ctx);
- });
+ registry.addExtension(
+ +[](MLIRContext *context, ml_program::MLProgramDialect *dialect) {
+ ml_program::GlobalOp::attachInterface<GlobalOpInterfaceExternalModel>(
+ *context);
+ });
- registry.addExtension(+[](MLIRContext *ctx, arith::ArithDialect *dialect) {
+ registry.addExtension(+[](MLIRContext *context,
+ arith::ArithDialect *dialect) {
GenericNumericCastExternalModel::add<
arith::BitcastOp, arith::ExtFOp, arith::ExtUIOp, arith::ExtSIOp,
arith::FPToSIOp, arith::FPToUIOp, arith::IndexCastOp, arith::TruncFOp,
- arith::TruncIOp, arith::SIToFPOp, arith::UIToFPOp>(ctx);
+ arith::TruncIOp, arith::SIToFPOp, arith::UIToFPOp>(context);
});
- registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) {
- tensor::InsertSliceOp::attachInterface<InsertSliceOpTiedOpInterface>(*ctx);
- });
+ registry.addExtension(
+ +[](MLIRContext *context, tensor::TensorDialect *dialect) {
+ tensor::InsertSliceOp::attachInterface<InsertSliceOpTiedOpInterface>(
+ *context);
+ });
- registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
- // Register all Linalg structured ops. `LinalgOp` is an interface and it is
- // not possible to attach an external interface to an existing interface.
- // Therefore, attach the `TiedOpInterface` to all ops one-by-one.
- LinalgOpTiedOpInterfaceHelper<
+ registry.addExtension(
+ +[](MLIRContext *context, linalg::LinalgDialect *dialect) {
+ // Register all Linalg structured ops. `LinalgOp` is an interface and it
+ // is not possible to attach an external interface to an existing
+ // interface. Therefore, attach the `TiedOpInterface` to all ops
+ // one-by-one.
+ LinalgOpTiedOpInterfaceHelper<
#define GET_OP_LIST
#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
- >::registerOpInterface(ctx);
- });
+ >::registerOpInterface(context);
+ });
// TODO(matthias-springer): Use a helper instead of listing all ops. This is
// tricky because LinalgExtOps.td includes YieldOp.
- registry.addExtension(+[](MLIRContext *ctx,
+ registry.addExtension(+[](MLIRContext *context,
LinalgExt::IREELinalgExtDialect *dialect) {
LinalgExt::ScatterOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::ScatterOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::ScatterOp>>(*context);
LinalgExt::SortOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::SortOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::SortOp>>(*context);
LinalgExt::FftOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::FftOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::FftOp>>(*context);
LinalgExt::ScanOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::ScanOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::ScanOp>>(*context);
LinalgExt::ReverseOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::ReverseOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::ReverseOp>>(*context);
LinalgExt::TopkOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::TopkOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::TopkOp>>(*context);
LinalgExt::WinogradInputTransformOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::WinogradInputTransformOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::WinogradInputTransformOp>>(*context);
LinalgExt::WinogradOutputTransformOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::WinogradOutputTransformOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::WinogradOutputTransformOp>>(
+ *context);
LinalgExt::AttentionOp::attachInterface<
- LinalgOpTiedOpInterface<LinalgExt::AttentionOp>>(*ctx);
+ LinalgOpTiedOpInterface<LinalgExt::AttentionOp>>(*context);
});
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
index 3b9e0ad..83a6bd8 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
@@ -723,6 +723,17 @@
let methods = [
InterfaceMethod<
/*desc=*/[{
+ }],
+ /*retTy=*/"void",
+ /*methodName=*/"getAllTiedOperands",
+ /*args=*/(ins "SmallVectorImpl<int64_t> &":$indices),
+ /*methodBody=*/[{}],
+ /*defaultImplementation=*/[{
+ IREE::Util::detail::getAllTiedOperands($_op, indices);
+ }]
+ >,
+ InterfaceMethod<
+ /*desc=*/[{
Returns the set of operands that results may be tied to as an
(index, length) pair ala getODSOperandIndexAndLength.
@@ -733,8 +744,9 @@
of successor operands.
}],
/*retTy=*/"std::pair<unsigned, unsigned>",
- /*methodName=*/"getTiedOperandsIndexAndLength", (ins),
- /*args=*/[{}],
+ /*methodName=*/"getTiedOperandsIndexAndLength",
+ /*args=*/(ins),
+ /*methodBody=*/[{}],
/*defaultImplementation=*/[{
return {0, $_op->getNumOperands()};
}]
@@ -749,8 +761,9 @@
ones it will tie.
}],
/*retTy=*/"std::pair<unsigned, unsigned>",
- /*methodName=*/"getTiedResultsIndexAndLength", (ins),
- /*args=*/[{}],
+ /*methodName=*/"getTiedResultsIndexAndLength",
+ /*args=*/(ins),
+ /*methodBody=*/[{}],
/*defaultImplementation=*/[{
return {0, $_op->getNumResults()};
}]
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
index 75ea2c3..bc59c87 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
@@ -327,12 +327,16 @@
SmallVectorImpl<Type> &operandTypes) {
if (failed(parser.parseLParen()))
return failure();
- while (!succeeded(parser.parseOptionalRParen())) {
+ if (succeeded(parser.parseOptionalRParen()))
+ return success(); // empty
+ do {
Type type;
if (failed(parser.parseType(type)))
return failure();
operandTypes.push_back(type);
- }
+ } while (succeeded(parser.parseOptionalComma()));
+ if (failed(parser.parseRParen()))
+ return failure();
return success();
}
@@ -1262,8 +1266,9 @@
OpBuilder builder(location->getContext());
OperationState state(location, getOperationName());
FuncOp::build(builder, state, name, type,
- builder.getIndexArrayAttr(tiedOperands), attrs, argAttrs,
- resAttrs);
+ tiedOperands.empty() ? ArrayAttr{}
+ : builder.getIndexArrayAttr(tiedOperands),
+ attrs, argAttrs, resAttrs);
return cast<FuncOp>(Operation::create(state));
}
@@ -1275,12 +1280,14 @@
state.addAttribute(SymbolTable::getSymbolAttrName(),
builder.getStringAttr(name));
state.addAttribute(SymbolTable::getVisibilityAttrName(),
- builder.getStringAttr("private"));
+ builder.getStringAttr("public"));
state.addAttribute("function_type", TypeAttr::get(type));
state.attributes.append(attrs.begin(), attrs.end());
state.attributes.erase(IREE::Util::TiedOpInterface::getStorageAttrName());
- state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
- tiedOperands);
+ if (tiedOperands) {
+ state.addAttribute(IREE::Util::TiedOpInterface::getStorageAttrName(),
+ tiedOperands);
+ }
state.addRegion();
if (!argAttrs.empty() || !resAttrs.empty()) {
assert(type.getNumInputs() == argAttrs.size());
@@ -1417,6 +1424,72 @@
}
}
+bool IREE::Util::FuncOp::hasAnyTiedOperands() {
+ auto tiedOperandsAttr = getTiedOperandsAttr();
+ if (!tiedOperandsAttr)
+ return false;
+ return llvm::any_of(
+ tiedOperandsAttr.getAsRange<IntegerAttr>(), [](IntegerAttr attr) {
+ return attr.getInt() != IREE::Util::TiedOpInterface::kUntiedIndex;
+ });
+}
+
+void IREE::Util::FuncOp::expandSignature(
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandArgument,
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandResult) {
+ auto oldType = getFunctionType();
+
+ SmallVector<DictionaryAttr> oldArgumentAttrs;
+ getAllArgAttrs(oldArgumentAttrs);
+ SmallVector<DictionaryAttr> oldResultAttrs;
+ getAllResultAttrs(oldResultAttrs);
+
+ SmallVector<int64_t> adjustedTiedOperands;
+ IREE::Util::detail::getAllTiedOperands(getOperation(), adjustedTiedOperands);
+
+ SmallVector<Type> newArgumentTypes;
+ SmallVector<DictionaryAttr> newArgumentAttrs;
+ for (auto [oldIndex, argType] : llvm::enumerate(oldType.getInputs())) {
+ size_t newIndex = newArgumentTypes.size();
+ expandArgument(oldIndex, argType, newArgumentTypes);
+ size_t expandedCount = newArgumentTypes.size() - newIndex;
+ for (size_t i = 0; i < adjustedTiedOperands.size(); ++i) {
+ if (adjustedTiedOperands[i] == oldIndex)
+ adjustedTiedOperands[i] = newIndex;
+ }
+ newArgumentAttrs.push_back(oldArgumentAttrs[oldIndex]);
+ newArgumentAttrs.append(expandedCount - 1,
+ DictionaryAttr::get(getContext()));
+ }
+
+ SmallVector<Type> newResultTypes;
+ SmallVector<int64_t> newTiedOperands;
+ SmallVector<DictionaryAttr> newResultAttrs;
+ for (auto [oldIndex, resultType] : llvm::enumerate(oldType.getResults())) {
+ size_t newIndex = newResultTypes.size();
+ expandResult(oldIndex, resultType, newResultTypes);
+ size_t expandedCount = newResultTypes.size() - newIndex;
+ newTiedOperands.push_back(adjustedTiedOperands[oldIndex]);
+ newTiedOperands.append(expandedCount - 1,
+ IREE::Util::TiedOpInterface::kUntiedIndex);
+ newResultAttrs.push_back(oldResultAttrs[oldIndex]);
+ newResultAttrs.append(expandedCount - 1, DictionaryAttr::get(getContext()));
+ }
+
+ auto newType =
+ FunctionType::get(getContext(), newArgumentTypes, newResultTypes);
+ if (newType != oldType) {
+ setFunctionType(newType);
+ setTiedOperandsAttr(ArrayAttr::get(
+ getContext(),
+ llvm::map_to_vector<8>(newTiedOperands, [&](int64_t v) -> Attribute {
+ return IntegerAttr::get(IndexType::get(getContext()), v);
+ })));
+ setAllArgAttrs(newArgumentAttrs);
+ setAllResultAttrs(newResultAttrs);
+ }
+}
+
//===----------------------------------------------------------------------===//
// util.call
//===----------------------------------------------------------------------===//
@@ -1425,6 +1498,24 @@
return FunctionType::get(getContext(), getOperandTypes(), getResultTypes());
}
+static bool areTiedOperandsEqual(ArrayAttr a, ArrayAttr b) {
+ auto hasAnyTied = [](ArrayAttr tiedOperandsAttr) {
+ if (!tiedOperandsAttr)
+ return false;
+ return llvm::any_of(
+ tiedOperandsAttr.getAsRange<IntegerAttr>(), [](IntegerAttr attr) {
+ return attr.getInt() != IREE::Util::TiedOpInterface::kUntiedIndex;
+ });
+ };
+ bool hasAnyTiedA = hasAnyTied(a);
+ bool hasAnyTiedB = hasAnyTied(b);
+ if (hasAnyTiedA != hasAnyTiedB)
+ return false;
+ if (!a || !b)
+ return true;
+ return a == b;
+}
+
LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
Operation *op = getOperation();
@@ -1444,16 +1535,50 @@
}
// Ensure tied operands are consistent.
- auto expectedTiedOperands = getTiedOperandsAttr();
+ auto callerTiedOperands = getTiedOperandsAttr();
auto calleeTiedOperands = calleeOp.getTiedOperandsAttr();
- if (calleeTiedOperands != expectedTiedOperands) {
- return emitOpError("function tied operands mismatch; expected ")
- << expectedTiedOperands << " but callee is " << calleeTiedOperands;
+ if (!areTiedOperandsEqual(calleeTiedOperands, callerTiedOperands)) {
+ return emitOpError("function tied operands mismatch; have ")
+ << callerTiedOperands << " but callee is " << calleeTiedOperands;
}
return success();
}
+IREE::Util::CallOp IREE::Util::CallOp::cloneAndExpand(
+ std::function<void(unsigned, Value, SmallVectorImpl<Value> &)>
+ expandOperand,
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandResult,
+ OpBuilder &builder) {
+ SmallVector<int64_t> adjustedTiedOperands;
+ IREE::Util::detail::getAllTiedOperands(getOperation(), adjustedTiedOperands);
+
+ SmallVector<Value> newOperands;
+ for (auto [oldIndex, operand] : llvm::enumerate(getOperands())) {
+ size_t newIndex = newOperands.size();
+ expandOperand(oldIndex, operand, newOperands);
+ for (size_t i = 0; i < adjustedTiedOperands.size(); ++i) {
+ if (adjustedTiedOperands[i] == oldIndex)
+ adjustedTiedOperands[i] = newIndex;
+ }
+ }
+
+ SmallVector<Type> newResultTypes;
+ SmallVector<int64_t> newTiedOperands;
+ for (auto [oldIndex, resultType] : llvm::enumerate(getResultTypes())) {
+ size_t newIndex = newResultTypes.size();
+ expandResult(oldIndex, resultType, newResultTypes);
+ size_t expandedCount = newResultTypes.size() - newIndex;
+ newTiedOperands.push_back(adjustedTiedOperands[oldIndex]);
+ newTiedOperands.append(expandedCount - 1,
+ IREE::Util::TiedOpInterface::kUntiedIndex);
+ }
+
+ return builder.create<IREE::Util::CallOp>(
+ getLoc(), newResultTypes, getCallee(), newOperands,
+ builder.getIndexArrayAttr(newTiedOperands));
+}
+
//===----------------------------------------------------------------------===//
// util.return
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
index 823558d..bf9f7a8 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
@@ -634,6 +634,22 @@
ArrayRef<Type> getArgumentTypes() { return getFunctionType().getInputs(); }
ArrayRef<Type> getResultTypes() { return getFunctionType().getResults(); }
+
+ // Returns true if any operand is tied to a result.
+ bool hasAnyTiedOperands();
+
+ // Updates the function signature to potentially expand each argument and
+ // result. Only the signature and the metadata on the function (tied
+ // operands, argument/result attrs, etc) are updated and the body region
+ // remains unchanged.
+ //
+ // Any type that may be tied must remain in the same relative order (expand
+ // by appending types after the base type).
+ //
+ // If |newSignature| is provided
+ void expandSignature(
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandArgument,
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandResult);
}];
let hasCustomAssemblyFormat = 1;
@@ -671,6 +687,19 @@
Variadic<AnyType>:$results
);
+ let builders = [
+ OpBuilder<(ins
+ CArg<"FunctionOpInterface">:$callee,
+ CArg<"ValueRange">:$operands,
+ CArg<"ArrayAttr", "{}">:$tied_operands,
+ CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs
+ ), [{
+ build($_builder, $_state, callee.getResultTypes(), callee.getName(),
+ operands, tied_operands);
+ $_state.addAttributes(attrs);
+ }]>,
+ ];
+
let assemblyFormat = [{
$callee `(` $operands `)`
attr-dict `:`
@@ -696,6 +725,15 @@
void setCalleeFromCallable(CallInterfaceCallable callee) {
(*this)->setAttr("callee", callee.get<SymbolRefAttr>());
}
+
+ // Clones the call and potentially expands each operand and result.
+ // Callers can then replace result uses using the returned op.
+ // Any type that may be tied must remain in the same relative order (expand
+ // by appending types after the base type).
+ IREE::Util::CallOp cloneAndExpand(
+ std::function<void(unsigned, Value, SmallVectorImpl<Value> &)> expandOperand,
+ std::function<void(unsigned, Type, SmallVectorImpl<Type> &)> expandResult,
+ OpBuilder &builder);
}];
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
index d9c4a92..3d0d28e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
@@ -228,6 +228,17 @@
return false;
}
+bool isPublicOrExternal(CallableOpInterface callableOp) {
+ if (auto symbolOp = dyn_cast<SymbolOpInterface>(callableOp.getOperation())) {
+ if (symbolOp.isPublic())
+ return true;
+ }
+ auto *region = callableOp.getCallableRegion();
+ if (!region || region->empty())
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Global and structural interface utilities
//===----------------------------------------------------------------------===//
@@ -350,6 +361,22 @@
// IREE::Util::TiedOpInterface
//===----------------------------------------------------------------------===//
+void detail::getAllTiedOperands(Operation *op,
+ SmallVectorImpl<int64_t> &indices) {
+ if (auto tiedOperandsAttr = op->getAttrOfType<ArrayAttr>(
+ IREE::Util::TiedOpInterface::getStorageAttrName())) {
+ for (auto indexAttr : tiedOperandsAttr.getAsRange<IntegerAttr>()) {
+ indices.push_back(indexAttr.getInt());
+ }
+ } else if (auto tiedOp = dyn_cast<IREE::Util::TiedOpInterface>(op)) {
+ indices.assign(op->getNumResults(),
+ IREE::Util::TiedOpInterface::kUntiedIndex);
+ } else if (auto callableOp = dyn_cast<CallableOpInterface>(op)) {
+ indices.assign(callableOp.getResultTypes().size(),
+ IREE::Util::TiedOpInterface::kUntiedIndex);
+ }
+}
+
std::optional<unsigned>
detail::getTiedResultOperandIndex(Operation *op, unsigned resultIndex) {
auto storageAttr = op->getAttrOfType<ArrayAttr>(
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
index 230c87d..5ac353e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
@@ -20,6 +20,7 @@
#include "mlir/IR/TypeSupport.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Types.h"
+#include "mlir/Interfaces/CallInterfaces.h"
// clang-format off: must be included after all LLVM/MLIR headers.
#include "iree/compiler/Dialect/Util/IR/UtilEnums.h.inc" // IWYU pragma: keep
@@ -104,6 +105,11 @@
// Returns true if the move was successful.
bool tryMoveProducerBefore(Value value, Operation *consumerOp);
+// Returns true if the given callable op is public or external (no body).
+// Such callables cannot have their signature changed without (potentially)
+// breaking linking.
+bool isPublicOrExternal(CallableOpInterface callableOp);
+
//===----------------------------------------------------------------------===//
// Global and structural interface utilities
//===----------------------------------------------------------------------===//
@@ -130,6 +136,7 @@
namespace detail {
+void getAllTiedOperands(Operation *op, SmallVectorImpl<int64_t> &indices);
std::optional<unsigned> getTiedResultOperandIndex(Operation *op,
unsigned resultIndex);
void setTiedResultOperandIndex(Operation *op, unsigned resultIndex,
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_folding.mlir
index 3477dbe..522c13c 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_folding.mlir
@@ -2,65 +2,65 @@
// CHECK-LABEL: @foldSameAlignment
// CHECK-SAME: (%[[VALUE:.+]]: index, %[[ALIGNMENT:.+]]: index)
-func.func @foldSameAlignment(%value: index, %alignment: index) -> index {
+util.func public @foldSameAlignment(%value: index, %alignment: index) -> index {
// CHECK: %[[RET:.+]] = util.align %[[VALUE]], %[[ALIGNMENT]]
%0 = util.align %value, %alignment : index
// CHECK-NOT: util.align
%1 = util.align %0, %alignment : index
- // CHECK: return %[[RET]]
- return %1 : index
+ // CHECK: util.return %[[RET]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @foldGreaterAlignment
// CHECK-SAME: (%[[VALUE:.+]]: index)
-func.func @foldGreaterAlignment(%value: index) -> index {
+util.func public @foldGreaterAlignment(%value: index) -> index {
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
// CHECK: %[[RET:.+]] = util.align %[[VALUE]], %c16
%0 = util.align %value, %c16 : index
// CHECK-NOT: util.align
%1 = util.align %0, %c8 : index
- // CHECK: return %[[RET]]
- return %1 : index
+ // CHECK: util.return %[[RET]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @dontFoldLesserAlignment
// CHECK-SAME: (%[[VALUE:.+]]: index)
-func.func @dontFoldLesserAlignment(%value: index) -> index {
+util.func public @dontFoldLesserAlignment(%value: index) -> index {
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
// CHECK: %[[ALIGN16:.+]] = util.align %[[VALUE]], %c8
%0 = util.align %value, %c8 : index
// CHECK: %[[ALIGN8:.+]] = util.align %[[ALIGN16]], %c16
%1 = util.align %0, %c16 : index
- // CHECK: return %[[ALIGN8]]
- return %1 : index
+ // CHECK: util.return %[[ALIGN8]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @dontFoldMixedAlignment
// CHECK-SAME: (%[[VALUE:.+]]: index)
-func.func @dontFoldMixedAlignment(%value: index) -> index {
+util.func public @dontFoldMixedAlignment(%value: index) -> index {
%c9 = arith.constant 9 : index
%c16 = arith.constant 16 : index
// CHECK: %[[ALIGN16:.+]] = util.align %[[VALUE]], %c16
%0 = util.align %value, %c16 : index
// CHECK: %[[ALIGN9:.+]] = util.align %[[ALIGN16]], %c9
%1 = util.align %0, %c9 : index
- // CHECK: return %[[ALIGN9]]
- return %1 : index
+ // CHECK: util.return %[[ALIGN9]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @foldAlignmentRecursively
// CHECK-SAME: (%[[VALUE:.+]]: index, %[[ALIGNMENT:.+]]: index)
-func.func @foldAlignmentRecursively(%value: index, %alignment: index) -> index {
+util.func public @foldAlignmentRecursively(%value: index, %alignment: index) -> index {
%c16 = arith.constant 16 : index
// CHECK: %[[ALIGN16:.+]] = util.align %[[VALUE]], %c16
%0 = util.align %value, %c16 : index
@@ -68,15 +68,15 @@
%1 = util.align %0, %alignment : index
// CHECK-NOT: util.align
%2 = util.align %1, %c16 : index
- // CHECK: return %[[ALIGN_DYNAMIC]]
- return %2 : index
+ // CHECK: util.return %[[ALIGN_DYNAMIC]]
+ util.return %2 : index
}
// -----
// CHECK-LABEL: @foldAddAlignment
// CHECK-SAME: (%[[LHS:.+]]: index, %[[RHS:.+]]: index, %[[ALIGNMENT:.+]]: index)
-func.func @foldAddAlignment(%lhs: index, %rhs: index, %alignment: index) -> index {
+util.func public @foldAddAlignment(%lhs: index, %rhs: index, %alignment: index) -> index {
// CHECK: %[[LHS_ALIGNED:.+]] = util.align %[[LHS]], %[[ALIGNMENT]]
%lhs_aligned = util.align %lhs, %alignment : index
// CHECK: %[[RHS_ALIGNED:.+]] = util.align %[[RHS]], %[[ALIGNMENT]]
@@ -85,15 +85,15 @@
%sum_aligned = arith.addi %lhs_aligned, %rhs_aligned : index
// CHECK-NOT: util.align
%result = util.align %sum_aligned, %alignment : index
- // CHECK: return %[[SUM_ALIGNED]]
- return %result : index
+ // CHECK: util.return %[[SUM_ALIGNED]]
+ util.return %result : index
}
// -----
// CHECK-LABEL: @foldAddAlignmentConstant
// CHECK-SAME: (%[[LHS:.+]]: index)
-func.func @foldAddAlignmentConstant(%lhs: index) -> index {
+util.func public @foldAddAlignmentConstant(%lhs: index) -> index {
%c16 = arith.constant 16 : index
%c32 = arith.constant 32 : index
%c64 = arith.constant 64 : index
@@ -103,29 +103,29 @@
%sum_aligned = arith.addi %lhs_aligned, %c32 : index
// CHECK-NOT: util.align
%result = util.align %sum_aligned, %c16 : index
- // CHECK: return %[[SUM_ALIGNED]]
- return %result : index
+ // CHECK: util.return %[[SUM_ALIGNED]]
+ util.return %result : index
}
// -----
// CHECK-LABEL: @foldMulAlignmentConstant
// CHECK-SAME: (%[[LHS:.+]]: index)
-func.func @foldMulAlignmentConstant(%lhs: index) -> index {
+util.func public @foldMulAlignmentConstant(%lhs: index) -> index {
%c64 = arith.constant 64 : index
%c2048 = arith.constant 2048 : index
// CHECK: %[[RESULT:.+]] = arith.muli %[[LHS]], %c2048
%lhs_mul = arith.muli %lhs, %c2048 : index
// CHECK-NOT: util.align
%result = util.align %lhs_mul, %c64 : index
- // CHECK: return %[[RESULT]]
- return %result : index
+ // CHECK: util.return %[[RESULT]]
+ util.return %result : index
}
// -----
// CHECK-LABEL: @foldConstantAlign
-func.func @foldConstantAlign() -> (index, index, index) {
+util.func public @foldConstantAlign() -> (index, index, index) {
%c0 = arith.constant 0 : index
%c7 = arith.constant 7 : index
%c8 = arith.constant 8 : index
@@ -134,14 +134,14 @@
%0 = util.align %c0, %c64 : index
%1 = util.align %c7, %c8 : index
%2 = util.align %c9, %c8 : index
- // CHECK: return %c0, %c8, %c16
- return %0, %1, %2 : index, index, index
+ // CHECK: util.return %c0, %c8, %c16
+ util.return %0, %1, %2 : index, index, index
}
// -----
// CHECK-LABEL: @foldAffineAlign
-func.func @foldAffineAlign(%arg0: index) -> (index, index) {
+util.func public @foldAffineAlign(%arg0: index) -> (index, index) {
// CHECK: %[[A0:.+]] = affine.apply affine_map<()[s0] -> (s0 * 16384)>()[%arg0]
%a0 = affine.apply affine_map<()[s0] -> (s0 * 16384)>()[%arg0]
%c64 = arith.constant 64 : index
@@ -150,33 +150,33 @@
%b0 = affine.apply affine_map<()[s0] -> ((s0 * s0) * 4)>()[%arg0]
%c4 = arith.constant 4 : index
%b1 = util.align %b0, %c4 : index
- // CHECK: return %[[A0]], %[[B0]]
- return %a1, %b1 : index, index
+ // CHECK: util.return %[[A0]], %[[B0]]
+ util.return %a1, %b1 : index, index
}
// -----
// CHECK-LABEL: @sizeofWholeInt
-func.func @sizeofWholeInt() -> index {
+util.func public @sizeofWholeInt() -> index {
// CHECK: = arith.constant 4 : index
%0 = util.sizeof i32
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @sizeofSubByteInt
-func.func @sizeofSubByteInt() -> index {
+util.func public @sizeofSubByteInt() -> index {
// CHECK: = arith.constant 2 : index
%0 = util.sizeof i12
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @sizeofFloat
-func.func @sizeofFloat() -> index {
+util.func public @sizeofFloat() -> index {
// CHECK: = arith.constant 4 : index
%0 = util.sizeof f32
- return %0 : index
+ util.return %0 : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_ops.mlir
index 8bf52f8..d6f45cb 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/alignment_ops.mlir
@@ -1,26 +1,26 @@
// RUN: iree-opt --split-input-file %s | FileCheck %s
// CHECK-LABEL: @utilAlign
-func.func @utilAlign(%arg0 : index, %arg1: index) {
+util.func public @utilAlign(%arg0 : index, %arg1: index) {
// CHECK: = util.align %arg0, %arg1 : index
%result = util.align %arg0, %arg1 : index
- return
+ util.return
}
// -----
// CHECK-LABEL: @utilAlignInt
-func.func @utilAlignInt(%arg0 : i32, %arg1: i32) {
+util.func public @utilAlignInt(%arg0 : i32, %arg1: i32) {
// CHECK: = util.align %arg0, %arg1 : i32
%result = util.align %arg0, %arg1 : i32
- return
+ util.return
}
// -----
// CHECK-LABEL: @sizeofUnfoldable
-func.func @sizeofUnfoldable() -> index {
+util.func public @sizeofUnfoldable() -> index {
// CHECK: = util.sizeof index
%0 = util.sizeof index
- return %0 : index
+ util.return %0 : index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_folding.mlir
index a541266..d8c0261 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_folding.mlir
@@ -1,29 +1,29 @@
// RUN: iree-opt --split-input-file --canonicalize %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @foldSwitchI32Nop
-func.func @foldSwitchI32Nop(%arg0 : index) -> i32 {
+util.func public @foldSwitchI32Nop(%arg0 : index) -> i32 {
// CHECK: %[[DEFAULT:.+]] = arith.constant 5
%c5 = arith.constant 5 : i32
%0 = util.switch i32 from [] at %arg0 else %c5 : i32
- // CHECK: return %[[DEFAULT]] : i32
- return %0 : i32
+ // CHECK: util.return %[[DEFAULT]] : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @foldSwitchI32Identical
-func.func @foldSwitchI32Identical(%arg0 : index) -> i32 {
+util.func public @foldSwitchI32Identical(%arg0 : index) -> i32 {
// CHECK: %[[C100:.+]] = arith.constant 100
%c100 = arith.constant 100 : i32
%0 = util.switch i32 from [%c100, %c100, %c100] at %arg0 else %c100 : i32
- // CHECK: return %[[C100]] : i32
- return %0 : i32
+ // CHECK: util.return %[[C100]] : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @foldSwitchI32ConstantIndex
-func.func @foldSwitchI32ConstantIndex() -> (i32, i32, i32, i32) {
+util.func public @foldSwitchI32ConstantIndex() -> (i32, i32, i32, i32) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -40,42 +40,42 @@
%1 = util.switch i32 from [%c100, %c200, %c300] at %c1 else %c400 : i32
%2 = util.switch i32 from [%c100, %c200, %c300] at %c2 else %c400 : i32
%3 = util.switch i32 from [%c100, %c200, %c300] at %c3 else %c400 : i32
- // CHECK: return %[[C100]], %[[C200]], %[[C300]], %[[C400]] : i32, i32, i32, i32
- return %0, %1, %2, %3 : i32, i32, i32, i32
+ // CHECK: util.return %[[C100]], %[[C200]], %[[C300]], %[[C400]] : i32, i32, i32, i32
+ util.return %0, %1, %2, %3 : i32, i32, i32, i32
}
// -----
// CHECK-LABEL: @foldCastSameType
// CHECK-SAME: (%[[SOURCE:.+]]: !util.buffer)
-func.func @foldCastSameType(%source: !util.buffer) -> !util.buffer {
+util.func public @foldCastSameType(%source: !util.buffer) -> !util.buffer {
// CHECK-NOT: util.cast
%0 = util.cast %source : !util.buffer to !util.buffer
- // CHECK: return %[[SOURCE]]
- return %0 : !util.buffer
+ // CHECK: util.return %[[SOURCE]]
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @foldChainedCast
// CHECK-SAME: (%[[SOURCE:.+]]: !util.buffer)
-func.func @foldChainedCast(%source: !util.buffer) -> !util.buffer {
+util.func public @foldChainedCast(%source: !util.buffer) -> !util.buffer {
// CHECK-NOT: util.cast
%0 = util.cast %source : !util.buffer to !util.object
// CHECK-NOT: util.cast
%1 = util.cast %0 : !util.object to !util.buffer
- // CHECK: return %[[SOURCE]]
- return %1 : !util.buffer
+ // CHECK: util.return %[[SOURCE]]
+ util.return %1 : !util.buffer
}
// -----
// CHECK-LABEL: @foldCastIntoNullOp
-func.func @foldCastIntoNullOp() -> !util.buffer {
+util.func public @foldCastIntoNullOp() -> !util.buffer {
// CHECK: %[[NULL:.+]] = util.null : !util.buffer
%0 = util.null : !util.object
// CHECK-NOT: util.cast
%1 = util.cast %0 : !util.object to !util.buffer
- // CHECK: return %[[NULL]]
- return %1 : !util.buffer
+ // CHECK: util.return %[[NULL]]
+ util.return %1 : !util.buffer
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_ops.mlir
index 3df0fd7..9c420a7 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/assignment_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @switch
// CHECK-SAME: (%[[INDEX:.+]]: index)
-func.func @switch(%index: index) -> i32 {
+util.func public @switch(%index: index) -> i32 {
// CHECK-DAG: %[[C100:.+]] = arith.constant 100
%c100 = arith.constant 100 : i32
// CHECK-DAG: %[[C200:.+]] = arith.constant 200
@@ -13,15 +13,15 @@
%default = arith.constant 400 : i32
// CHECK: = util.switch i32 from [%[[C100]], %[[C200]], %[[C300]]] at %[[INDEX]] else %[[DEFAULT]] : i32
%0 = util.switch i32 from [%c100, %c200, %c300] at %index else %default : i32
- return %0 : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @cast
// CHECK-SAME: (%[[SOURCE:.+]]: !util.buffer)
-func.func @cast(%source: !util.buffer) -> !util.object {
+util.func public @cast(%source: !util.buffer) -> !util.object {
// CHECK: = util.cast %[[SOURCE]] : !util.buffer to !util.object
%0 = util.cast %source : !util.buffer to !util.object
- return %0 : !util.object
+ util.return %0 : !util.object
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
index 811dacb..2ee5bbb 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
@@ -1,30 +1,30 @@
// RUN: iree-opt --split-input-file --canonicalize %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @FoldSubspansIntoSliceOp
-func.func @FoldSubspansIntoSliceOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
+util.func public @FoldSubspansIntoSliceOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: %[[OFFSET:.+]] = arith.addi %arg2, %c100
%0 = util.buffer.subspan %arg0[%arg2] : !util.buffer{%arg1} -> !util.buffer{%arg3}
// CHECK: util.buffer.slice %arg0[%[[OFFSET]]] : !util.buffer{%arg1} -> !util.buffer{%c200}
%1 = util.buffer.slice %0[%c100] : !util.buffer{%arg3} -> !util.buffer{%c200}
- return %1 : !util.buffer
+ util.return %1 : !util.buffer
}
// -----
// CHECK-LABEL: @FoldBufferSubspanOp
-func.func @FoldBufferSubspanOp(%arg0: !util.buffer, %arg1: index, %arg2: index) -> !util.buffer {
+util.func public @FoldBufferSubspanOp(%arg0: !util.buffer, %arg1: index, %arg2: index) -> !util.buffer {
// CHECK-NOT: util.buffer.subspan
%0 = util.buffer.subspan %arg0[%arg1] : !util.buffer{%arg2} -> !util.buffer{%arg2}
- // CHECK: return %arg0
- return %0 : !util.buffer
+ // CHECK: util.return %arg0
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @FoldBufferSubspanOps
-func.func @FoldBufferSubspanOps(%arg0: !util.buffer, %arg1: index) -> !util.buffer {
+util.func public @FoldBufferSubspanOps(%arg0: !util.buffer, %arg1: index) -> !util.buffer {
%c100 = arith.constant 100 : index
%c300 = arith.constant 300 : index
%c400 = arith.constant 400 : index
@@ -33,14 +33,14 @@
%0 = util.buffer.subspan %arg0[%c100] : !util.buffer{%arg1} -> !util.buffer{%c500}
%1 = util.buffer.subspan %0[%c100] : !util.buffer{%c500} -> !util.buffer{%c400}
%2 = util.buffer.subspan %1[%c100] : !util.buffer{%c400} -> !util.buffer{%c300}
- // CHECK: return %[[RET]]
- return %2 : !util.buffer
+ // CHECK: util.return %[[RET]]
+ util.return %2 : !util.buffer
}
// -----
// CHECK-LABEL: @SinkSubspanAcrossSelectOps
-func.func @SinkSubspanAcrossSelectOps(%arg0: !util.buffer, %arg1: i1) -> !util.buffer {
+util.func public @SinkSubspanAcrossSelectOps(%arg0: !util.buffer, %arg1: i1) -> !util.buffer {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
@@ -51,29 +51,29 @@
// CHECK: %[[OFFSET:.+]] = arith.select %arg1, %c0, %c128 : index
%2 = arith.select %arg1, %0, %1 : !util.buffer
// CHECK-NEXT: %[[SUBSPAN:.+]] = util.buffer.subspan %arg0[%[[OFFSET]]] : !util.buffer{%c256} -> !util.buffer{%c128}
- // CHECK-NEXT: return %[[SUBSPAN]]
- return %2 : !util.buffer
+ // CHECK-NEXT: util.return %[[SUBSPAN]]
+ util.return %2 : !util.buffer
}
// -----
// CHECK-LABEL: @FoldBufferSizeOp
-func.func @FoldBufferSizeOp(%arg0: !util.buffer, %arg1: index) -> (index, i32) {
+util.func public @FoldBufferSizeOp(%arg0: !util.buffer, %arg1: index) -> (index, i32) {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
// CHECK-NOT: util.buffer.size
%0 = util.buffer.size %arg0 : !util.buffer
// CHECK: %[[LOAD:.+]] = util.buffer.load
%1 = util.buffer.load %arg0[%c0 for %c4] : !util.buffer{%arg1} -> i32
- // CHECK: return %arg1, %[[LOAD]]
- return %0, %1 : index, i32
+ // CHECK: util.return %arg1, %[[LOAD]]
+ util.return %0, %1 : index, i32
}
// -----
// CHECK-LABEL: @FoldNestedBufferSizeOp
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer)
-func.func @FoldNestedBufferSizeOp(%buffer: !util.buffer) {
+util.func public @FoldNestedBufferSizeOp(%buffer: !util.buffer) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c128 = arith.constant 128 : index
@@ -90,25 +90,25 @@
// CHECK: util.buffer.load %[[BUFFER]]{{.+}} : !util.buffer{%[[BUFFER_SIZE_OUTER]]}
%outer = util.buffer.load %buffer[%c128 for %c1] : !util.buffer{%buffer_size_outer} -> i8
util.optimization_barrier %outer : i8
- return
+ util.return
}
// -----
// CHECK-LABEL: @FoldConstantBufferSizeOp
-func.func @FoldConstantBufferSizeOp() -> index {
+util.func public @FoldConstantBufferSizeOp() -> index {
// CHECK-NOT: util.buffer.constant
%0 = util.buffer.constant : !util.buffer = dense<[1, 2, 3]> : tensor<3xi32>
// CHECK-NOT: util.buffer.size
%1 = util.buffer.size %0 : !util.buffer
- // CHECK: return %c12
- return %1 : index
+ // CHECK: util.return %c12
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @SelectBufferSizeOp
-func.func @SelectBufferSizeOp(%arg0: !util.buffer, %arg1: index, %arg2: !util.buffer, %arg3: index, %arg4: i1) -> (!util.buffer, index) {
+util.func public @SelectBufferSizeOp(%arg0: !util.buffer, %arg1: index, %arg2: !util.buffer, %arg3: index, %arg4: i1) -> (!util.buffer, index) {
%c0 = arith.constant 0 : index
// CHECK: %[[ARG0_T:.+]] = util.buffer.slice %arg0[%c0] : !util.buffer{%[[ARG0_SZ:.+]]} ->
%0 = util.buffer.slice %arg0[%c0] : !util.buffer{%arg1} -> !util.buffer{%arg1}
@@ -120,26 +120,26 @@
%3 = util.buffer.size %2 : !util.buffer
// CHECK: = util.buffer.slice %[[RET_T]][%c0] : !util.buffer{%[[RET_SIZE]]} ->
%4 = util.buffer.slice %2[%c0] : !util.buffer{%3} -> !util.buffer{%3}
- return %4, %3 : !util.buffer, index
+ util.return %4, %3 : !util.buffer, index
}
// -----
// CHECK-LABEL: @FoldSubspansIntoStorageOp
-func.func @FoldSubspansIntoStorageOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> (memref<?xi8>, index) {
+util.func public @FoldSubspansIntoStorageOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> (memref<?xi8>, index) {
// CHECK-NOT: util.buffer.subspan
%0 = util.buffer.subspan %arg0[%arg2] : !util.buffer{%arg1} -> !util.buffer{%arg3}
// CHECK: %[[STORAGE:.+]], %[[OFFSET:.+]] = util.buffer.storage %arg0 : !util.buffer{%arg1} -> (memref<?xi8>, index)
%1:2 = util.buffer.storage %0 : !util.buffer{%arg3} -> (memref<?xi8>, index)
// CHECK: %[[ADJUSTED_OFFSET:.+]] = arith.addi %arg2, %[[OFFSET]]
- // CHECK: return %[[STORAGE]], %[[ADJUSTED_OFFSET]]
- return %1#0, %1#1 : memref<?xi8>, index
+ // CHECK: util.return %[[STORAGE]], %[[ADJUSTED_OFFSET]]
+ util.return %1#0, %1#1 : memref<?xi8>, index
}
// -----
// CHECK-LABEL: @FoldSubspansIntoCopyOp
-func.func @FoldSubspansIntoCopyOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
+util.func public @FoldSubspansIntoCopyOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
%c1 = arith.constant 1 : index
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -149,13 +149,13 @@
%1 = util.buffer.subspan %arg0[%arg4] : !util.buffer{%arg1} -> !util.buffer{%arg5}
// CHECK: util.buffer.copy %arg0[%[[OFFSET_SRC]]], %arg0[%[[OFFSET_DST]]], %c1 : !util.buffer{%arg1} -> !util.buffer{%arg1}
util.buffer.copy %0[%c100], %1[%c200], %c1 : !util.buffer{%arg3} -> !util.buffer{%arg5}
- return
+ util.return
}
// -----
// CHECK-LABEL: @FoldSubspansIntoCompareOp
-func.func @FoldSubspansIntoCompareOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> i1 {
+util.func public @FoldSubspansIntoCompareOp(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> i1 {
%c1 = arith.constant 1 : index
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -165,26 +165,26 @@
%1 = util.buffer.subspan %arg0[%arg4] : !util.buffer{%arg1} -> !util.buffer{%arg5}
// CHECK: = util.buffer.compare %arg0[%[[OFFSET_LHS]]], %arg0[%[[OFFSET_RHS]]], %c1 : !util.buffer{%arg1}, !util.buffer{%arg1}
%2 = util.buffer.compare %0[%c100], %1[%c200], %c1 : !util.buffer{%arg3}, !util.buffer{%arg5}
- return %2 : i1
+ util.return %2 : i1
}
// -----
// CHECK-LABEL: @FoldSubspansIntoFillOp
-func.func @FoldSubspansIntoFillOp(%arg0: !util.buffer, %arg1: index, %arg2: i32, %arg3: index, %arg4: index) {
+util.func public @FoldSubspansIntoFillOp(%arg0: !util.buffer, %arg1: index, %arg2: i32, %arg3: index, %arg4: index) {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: %[[OFFSET:.+]] = arith.addi %arg3, %c100
%0 = util.buffer.subspan %arg0[%arg3] : !util.buffer{%arg1} -> !util.buffer{%arg4}
// CHECK: util.buffer.fill %arg2, %arg0[%[[OFFSET]] for %c200] : i32 -> !util.buffer{%arg1}
util.buffer.fill %arg2, %0[%c100 for %c200] : i32 -> !util.buffer{%arg4}
- return
+ util.return
}
// -----
// CHECK-LABEL: @FoldSubspanIntoLoadOp
-func.func @FoldSubspanIntoLoadOp(%arg0: !util.buffer, %arg1: index) -> i32 {
+util.func public @FoldSubspanIntoLoadOp(%arg0: !util.buffer, %arg1: index) -> i32 {
%c4 = arith.constant 4 : index
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
@@ -193,13 +193,13 @@
%0 = util.buffer.subspan %arg0[%c128] : !util.buffer{%arg1} -> !util.buffer{%c256}
// CHECK: = util.buffer.load %arg0[%c192 for %c4] : !util.buffer{%arg1} -> i32
%1 = util.buffer.load %0[%c64 for %c4] : !util.buffer{%c256} -> i32
- return %1 : i32
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @FoldSubspanIntoStoreOp
-func.func @FoldSubspanIntoStoreOp(%arg0: !util.buffer, %arg1: index) {
+util.func public @FoldSubspanIntoStoreOp(%arg0: !util.buffer, %arg1: index) {
%c4 = arith.constant 4 : index
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
@@ -209,5 +209,5 @@
%0 = util.buffer.subspan %arg0[%c128] : !util.buffer{%arg1} -> !util.buffer{%c256}
// CHECK: util.buffer.store %c123_i32, %arg0[%c192 for %c4] : i32 -> !util.buffer{%arg1}
util.buffer.store %c123_i32, %0[%c64 for %c4] : i32 -> !util.buffer{%c256}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_ops.mlir
index 736ae48..947e2e0 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_ops.mlir
@@ -1,139 +1,139 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @buffer_constant
-func.func @buffer_constant() -> !util.buffer {
+util.func public @buffer_constant() -> !util.buffer {
// CHECK: = util.buffer.constant : !util.buffer = dense<[1, 2, 3]> : tensor<3xi32>
%0 = util.buffer.constant : !util.buffer = dense<[1, 2, 3]> : tensor<3xi32>
- return %0 : !util.buffer
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @buffer_constant_string
-func.func @buffer_constant_string() -> !util.buffer {
+util.func public @buffer_constant_string() -> !util.buffer {
// CHECK: = util.buffer.constant : !util.buffer = "hello"
%0 = util.buffer.constant : !util.buffer = "hello"
- return %0 : !util.buffer
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @buffer_alloc
-func.func @buffer_alloc(%arg0: index) -> !util.buffer {
+util.func public @buffer_alloc(%arg0: index) -> !util.buffer {
// CHECK: = util.buffer.alloc uninitialized {alignment = 16 : index} : !util.buffer{%arg0}
%0 = util.buffer.alloc uninitialized {alignment = 16 : index} : !util.buffer{%arg0}
- return %0 : !util.buffer
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @buffer_dealloc
-func.func @buffer_dealloc(%arg0: !util.buffer, %arg1: index) {
+util.func public @buffer_dealloc(%arg0: !util.buffer, %arg1: index) {
// CHECK: util.buffer.dealloc %arg0 : !util.buffer{%arg1}
util.buffer.dealloc %arg0 : !util.buffer{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_slice
-func.func @buffer_slice(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
+util.func public @buffer_slice(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
// CHECK: = util.buffer.slice %arg0[%arg1] : !util.buffer{%arg2} -> !util.buffer{%arg3}
%0 = util.buffer.slice %arg0[%arg1] : !util.buffer{%arg2} -> !util.buffer{%arg3}
- return %0 : !util.buffer
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @buffer_subspan
-func.func @buffer_subspan(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
+util.func public @buffer_subspan(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: index) -> !util.buffer {
// CHECK: = util.buffer.subspan %arg0[%arg1] : !util.buffer{%arg2} -> !util.buffer{%arg3}
%0 = util.buffer.subspan %arg0[%arg1] : !util.buffer{%arg2} -> !util.buffer{%arg3}
- return %0 : !util.buffer
+ util.return %0 : !util.buffer
}
// -----
// CHECK-LABEL: @buffer_size
-func.func @buffer_size(%arg0: !util.buffer) -> index {
+util.func public @buffer_size(%arg0: !util.buffer) -> index {
// CHECK: = util.buffer.size %arg0 : !util.buffer
%0 = util.buffer.size %arg0 : !util.buffer
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @buffer_storage
-func.func @buffer_storage(%arg0: !util.buffer, %arg1: index) -> (memref<?xi8>, index) {
+util.func public @buffer_storage(%arg0: !util.buffer, %arg1: index) -> (memref<?xi8>, index) {
// CHECK: = util.buffer.storage %arg0 : !util.buffer{%arg1} -> (memref<?xi8>, index)
%0, %1 = util.buffer.storage %arg0 : !util.buffer{%arg1} -> (memref<?xi8>, index)
- return %0, %1 : memref<?xi8>, index
+ util.return %0, %1 : memref<?xi8>, index
}
// -----
// CHECK-LABEL: @buffer_copy
-func.func @buffer_copy(%arg0: !util.buffer, %arg1: index) {
+util.func public @buffer_copy(%arg0: !util.buffer, %arg1: index) {
%c1 = arith.constant 1 : index
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: util.buffer.copy %arg0[%c100], %arg0[%c200], %c1 : !util.buffer{%arg1} -> !util.buffer{%arg1}
util.buffer.copy %arg0[%c100], %arg0[%c200], %c1 : !util.buffer{%arg1} -> !util.buffer{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_compare
-func.func @buffer_compare(%arg0: !util.buffer, %arg1: index) -> i1 {
+util.func public @buffer_compare(%arg0: !util.buffer, %arg1: index) -> i1 {
%c1 = arith.constant 1 : index
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: = util.buffer.compare %arg0[%c100], %arg0[%c200], %c1 : !util.buffer{%arg1}, !util.buffer{%arg1}
%0 = util.buffer.compare %arg0[%c100], %arg0[%c200], %c1 : !util.buffer{%arg1}, !util.buffer{%arg1}
- return %0 : i1
+ util.return %0 : i1
}
// -----
// CHECK-LABEL: @buffer_fill
-func.func @buffer_fill(%arg0: !util.buffer, %arg1: index, %arg2: i32) {
+util.func public @buffer_fill(%arg0: !util.buffer, %arg1: index, %arg2: i32) {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
// CHECK: util.buffer.fill %arg2, %arg0[%c100 for %c200] : i32 -> !util.buffer{%arg1}
util.buffer.fill %arg2, %arg0[%c100 for %c200] : i32 -> !util.buffer{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_load
-func.func @buffer_load(%arg0: !util.buffer, %arg1: index) -> i32 {
+util.func public @buffer_load(%arg0: !util.buffer, %arg1: index) -> i32 {
%c4 = arith.constant 4 : index
%c100 = arith.constant 100 : index
// CHECK: = util.buffer.load %arg0[%c100 for %c4] : !util.buffer{%arg1} -> i32
%0 = util.buffer.load %arg0[%c100 for %c4] : !util.buffer{%arg1} -> i32
- return %0 : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @buffer_store
-func.func @buffer_store(%arg0: !util.buffer, %arg1: index, %arg2: i32) {
+util.func public @buffer_store(%arg0: !util.buffer, %arg1: index, %arg2: i32) {
%c4 = arith.constant 4 : index
%c100 = arith.constant 100 : index
// CHECK: util.buffer.store %arg2, %arg0[%c100 for %c4] : i32 -> !util.buffer{%arg1}
util.buffer.store %arg2, %arg0[%c100 for %c4] : i32 -> !util.buffer{%arg1}
- return
+ util.return
}
// -----
// CHECK-LABEL: @buffer_hash
-func.func @buffer_hash(%arg0: !util.buffer, %arg1: index) -> i64 {
+util.func public @buffer_hash(%arg0: !util.buffer, %arg1: index) -> i64 {
%c17 = arith.constant 17 : index
%c100 = arith.constant 100 : index
// CHECK: = util.buffer.hash %arg0[%c100 for %c17] : !util.buffer{%arg1} -> i64
%0 = util.buffer.hash %arg0[%c100 for %c17] : !util.buffer{%arg1} -> i64
- return %0 : i64
+ util.return %0 : i64
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/global_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/global_folding.mlir
index fc47404..669a2f5 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/global_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/global_folding.mlir
@@ -13,51 +13,51 @@
util.global private @v_unused : tensor<4xi32>
// CHECK-LABEL: @unused_load
-func.func @unused_load() {
- // CHECK-NEXT: return
+util.func public @unused_load() {
+ // CHECK-NEXT: util.return
%0 = util.global.load @v_unused : tensor<4xi32>
- return
+ util.return
}
// -----
util.global private @v_const {inlining_policy = #util.inline.never} = dense<1.0> : tensor<8xf32>
// CHECK-LABEL: @no_fold_noinline_immutable_const
-func.func @no_fold_noinline_immutable_const() -> tensor<8xf32> {
+util.func public @no_fold_noinline_immutable_const() -> tensor<8xf32> {
// CHECK-NEXT: = util.global.load @v_const : tensor<8xf32>
%0 = util.global.load @v_const : tensor<8xf32>
- return %0 : tensor<8xf32>
+ util.return %0 : tensor<8xf32>
}
// -----
util.global private mutable @v_nop : tensor<4xi32>
// CHECK-LABEL: @nop_load_store
-func.func @nop_load_store() {
- // CHECK-NEXT: return
+util.func public @nop_load_store() {
+ // CHECK-NEXT: util.return
%0 = util.global.load @v_nop : tensor<4xi32>
util.global.store %0, @v_nop : tensor<4xi32>
- return
+ util.return
}
// -----
util.global private @v : tensor<4xf32>
// CHECK-LABEL: @fold_load_indirect
-func.func @fold_load_indirect() -> tensor<4xf32> {
+util.func public @fold_load_indirect() -> tensor<4xf32> {
%0 = util.global.address @v : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: = util.global.load @v
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
- return %1 : tensor<4xf32>
+ util.return %1 : tensor<4xf32>
}
// -----
util.global private mutable @v : tensor<4xf32>
// CHECK-LABEL: @fold_store_indirect
-func.func @fold_store_indirect(%arg0 : tensor<4xf32>) {
+util.func public @fold_store_indirect(%arg0 : tensor<4xf32>) {
%0 = util.global.address @v : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: util.global.store %arg0, @v
util.global.store.indirect %arg0, %0 : tensor<4xf32> -> !util.ptr<tensor<4xf32>>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/global_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/global_ops.mlir
index b19b1b0..d5ff734 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/global_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/global_ops.mlir
@@ -43,34 +43,34 @@
util.global private @v_loaded : tensor<4xi32>
// CHECK-LABEL: @loaded
-func.func @loaded() {
+util.func public @loaded() {
// CHECK-NEXT: = util.global.load @v_loaded : tensor<4xi32>
%0 = util.global.load @v_loaded : tensor<4xi32>
- return
+ util.return
}
// -----
util.global private mutable @v_stored : tensor<4xi32>
// CHECK-LABEL: @stored
-func.func @stored() {
+util.func public @stored() {
// CHECK-NEXT: %[[VAL:.+]] = arith.constant
%cst = arith.constant dense<5> : tensor<4xi32>
// CHECK-NEXT: util.global.store %[[VAL]], @v_stored : tensor<4xi32>
util.global.store %cst, @v_stored : tensor<4xi32>
- return
+ util.return
}
// -----
util.global private @v_loaded : tensor<4xf32>
// CHECK-LABEL: @loaded_indirect
-func.func @loaded_indirect() {
+util.func public @loaded_indirect() {
// CHECK-NEXT: %[[ADDR:.+]] = util.global.address @v_loaded
%0 = util.global.address @v_loaded : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: = util.global.load.indirect %[[ADDR]]
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
- return
+ util.return
}
// -----
@@ -78,10 +78,10 @@
util.global private mutable @v_stored : tensor<4xf32>
// CHECK-LABEL: @stored_indirect
// CHECK-SAME: (%[[VALUE:.+]]: tensor<4xf32>)
-func.func @stored_indirect(%arg0: tensor<4xf32>) {
+util.func public @stored_indirect(%arg0: tensor<4xf32>) {
// CHECK-NEXT: %[[ADDR:.+]] = util.global.address @v_stored
%0 = util.global.address @v_stored : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: util.global.store.indirect %[[VALUE]], %[[ADDR]]
util.global.store.indirect %arg0, %0 : tensor<4xf32> -> !util.ptr<tensor<4xf32>>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
index e78803f..a1bc0b0 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
@@ -1,63 +1,63 @@
// RUN: iree-opt --verify-diagnostics --canonicalize --split-input-file %s | FileCheck %s
// CHECK-LABEL: @no_fold_constant
-func.func @no_fold_constant() -> (i32) {
+util.func public @no_fold_constant() -> (i32) {
// CHECK: constant 1 : i32
%0 = arith.constant 1 : i32
// CHECK: util.optimization_barrier
%1 = "util.optimization_barrier"(%0) : (i32) -> i32
- return %1 : i32
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @no_fold_add
-func.func @no_fold_add() -> (i32) {
+util.func public @no_fold_add() -> (i32) {
// CHECK-NEXT: %[[C1:.+]] = vm.const.i32 1
%c1 = vm.const.i32 1
// CHECK-NEXT: %[[R1:.+]] = util.optimization_barrier %[[C1]]
%0 = util.optimization_barrier %c1 : i32
// CHECK-NEXT: %[[R2:.+]] = vm.add.i32 %[[R1]], %[[R1]]
%1 = vm.add.i32 %0, %0 : i32
- // CHECK-NEXT: return %[[R2]]
- return %1 : i32
+ // CHECK-NEXT: util.return %[[R2]]
+ util.return %1 : i32
}
// -----
// Exists to check that the above succeeds when there's no barrier.
// CHECK-LABEL: @fold_add
-func.func @fold_add() -> (i32) {
+util.func public @fold_add() -> (i32) {
// CHECK-NEXT: %[[C2:.+]] = vm.const.i32 2
- // CHECK-NEXT: return %[[C2]]
+ // CHECK-NEXT: util.return %[[C2]]
%c1 = vm.const.i32 1
%0 = vm.add.i32 %c1, %c1 : i32
- return %0 : i32
+ util.return %0 : i32
}
// -----
-func.func @result_operand_count_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
+util.func public @result_operand_count_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
// expected-error@+1 {{must have same number of operands and results}}
%1 = "util.optimization_barrier"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
- return
+ util.return
}
// -----
-func.func @result_operand_type_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
+util.func public @result_operand_type_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
// expected-error@+1 {{must have same operand and result types, but they differ at index 1}}
%1:2 = "util.optimization_barrier"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> (tensor<i32>, memref<i32>)
- return
+ util.return
}
// -----
// CHECK-LABEL: @canonicalize_unfoldable_constant
-func.func @canonicalize_unfoldable_constant() -> i32 {
+util.func public @canonicalize_unfoldable_constant() -> i32 {
// CHECK-NEXT: %[[C:.+]] = arith.constant 42 : i32
// CHECK-NEXT: %[[R:.+]] = util.optimization_barrier %[[C]] : i32
%c42 = util.unfoldable_constant 42 : i32
- // CHECK-NEXT: return %[[R]]
- return %c42 : i32
+ // CHECK-NEXT: util.return %[[R]]
+ util.return %c42 : i32
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
index d8e79c5..8d21da5 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
@@ -3,7 +3,7 @@
// CHECK-LABEL: @parse_print_barrier
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9$._-]+]]
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9$._-]+]]
-func.func @parse_print_barrier(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
+util.func public @parse_print_barrier(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
// CHECK-NEXT: util.optimization_barrier %[[ARG0]] : tensor<i32>
%1 = util.optimization_barrier %arg0 : tensor<i32>
@@ -13,13 +13,13 @@
// CHECK-NEXT: util.optimization_barrier {some_unit} %[[ARG0]] : tensor<i32>
%has_attr = util.optimization_barrier {some_unit} %arg0 : tensor<i32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @parse_print_unfoldable_constant
-func.func @parse_print_unfoldable_constant(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
+util.func public @parse_print_unfoldable_constant(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
// CHECK-NEXT: util.unfoldable_constant 42
%c42 = util.unfoldable_constant 42 : i32
@@ -29,5 +29,5 @@
// CHECK: util.unfoldable_constant @func_with_args : (f32) -> ()
%csymref = util.unfoldable_constant @func_with_args : (f32) -> ()
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/list_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/list_ops.mlir
index 641e0cd..43eb04a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/list_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/list_ops.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @list_init_ops
-func.func @list_init_ops() {
+util.func public @list_init_ops() {
// CHECK: %[[CAPACITY:.+]] = arith.constant 5
%capacity = arith.constant 5 : index
// CHECK: = util.list.create %[[CAPACITY]] : !util.list<?>
@@ -15,14 +15,14 @@
// CHECK: util.list.resize %[[LIST]], %[[NEW_SIZE]] : !util.list<?>
util.list.resize %list, %new_size : !util.list<?>
- return
+ util.return
}
// -----
// CHECK-LABEL: @list_access
// CHECK-SAME: (%[[LIST:.+]]: !util.list<i32>)
-func.func @list_access(%list: !util.list<i32>) {
+util.func public @list_access(%list: !util.list<i32>) {
%c10 = arith.constant 10 : index
// CHECK: = util.list.get %[[LIST]][%c10] : !util.list<i32>
@@ -35,14 +35,14 @@
// CHECK: util.list.set %[[LIST]][%c10], %[[NEW_VALUE]] : !util.list<i32>
util.list.set %list[%c10], %new_value : !util.list<i32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @list_access_tensor
// CHECK-SAME: (%[[LIST:.+]]: !util.list<tensor<*xf32>>)
-func.func @list_access_tensor(%list: !util.list<tensor<*xf32>>) {
+util.func public @list_access_tensor(%list: !util.list<tensor<*xf32>>) {
%c10 = arith.constant 10 : index
// CHECK: = util.list.get %[[LIST]][%c10] : !util.list<tensor<*xf32>> -> tensor<?xf32>
@@ -53,14 +53,14 @@
// CHECK: util.list.set %[[LIST]][%c10], %[[NEW_VALUE]] : tensor<5xi32> -> !util.list<tensor<*xf32>>
util.list.set %list[%c10], %new_value : tensor<5xi32> -> !util.list<tensor<*xf32>>
- return
+ util.return
}
// -----
// CHECK-LABEL: @list_access_variant
// CHECK-SAME: (%[[LIST:.+]]: !util.list<?>)
-func.func @list_access_variant(%list: !util.list<?>) {
+util.func public @list_access_variant(%list: !util.list<?>) {
%c10 = arith.constant 10 : index
%c11 = arith.constant 11 : index
@@ -80,5 +80,5 @@
// CHECK: util.list.set %[[LIST]][%c11], %[[NEW_TENSOR_VALUE]] : tensor<5xi32> -> !util.list<?>
util.list.set %list[%c11], %new_tensor_value : tensor<5xi32> -> !util.list<?>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/numeric_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/numeric_ops.mlir
index 3e7ca7a..e366196 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/numeric_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/numeric_ops.mlir
@@ -1,19 +1,19 @@
// RUN: iree-opt --split-input-file %s | FileCheck %s
-func.func @optional_convert_scalar(%arg0 : i32) -> i32 {
+util.func public @optional_convert_scalar(%arg0 : i32) -> i32 {
// CHECK: util.numeric.optional_narrow %arg0 : i32 as si8
%0 = util.numeric.optional_narrow %arg0 : i32 as si8
- return %0 : i32
+ util.return %0 : i32
}
-func.func @optional_convert_tensor(%arg0 : tensor<f32>) -> tensor<f32> {
+util.func public @optional_convert_tensor(%arg0 : tensor<f32>) -> tensor<f32> {
// CHECK: util.numeric.optional_narrow %arg0 : tensor<f32> as si8
%0 = util.numeric.optional_narrow %arg0 : tensor<f32> as si8
- return %0 : tensor<f32>
+ util.return %0 : tensor<f32>
}
-func.func @optional_convert_zero(%arg0 : i32) -> i32 {
+util.func public @optional_convert_zero(%arg0 : i32) -> i32 {
// CHECK: util.numeric.optional_narrow %arg0 : i32 as ui0
%0 = util.numeric.optional_narrow %arg0 : i32 as ui0
- return %0 : i32
+ util.return %0 : i32
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/range_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/range_folding.mlir
index 5bc4184..5908d2b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/range_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/range_folding.mlir
@@ -3,7 +3,7 @@
// NOTE: util.range.min and util.range.max share their code so we just test min.
// CHECK-LABEL: @rangeMinConstant
-func.func @rangeMinConstant() -> (index, index) {
+util.func public @rangeMinConstant() -> (index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -12,24 +12,24 @@
%0 = util.range.min %c0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
%1 = util.range.min %c3, %c1, %c2 : index
- // CHECK: return %[[C0]], %[[C1]]
- return %0, %1 : index, index
+ // CHECK: util.return %[[C0]], %[[C1]]
+ util.return %0, %1 : index, index
}
// -----
// CHECK-LABEL: @rangeMinExpand
-func.func @rangeMinExpand(%arg0: index, %arg1: index) -> index {
+util.func public @rangeMinExpand(%arg0: index, %arg1: index) -> index {
// CHECK: %[[MIN:.+]] = arith.minui %arg0, %arg1 : index
%0 = util.range.min %arg0, %arg1 : index
- // CHECK: return %[[MIN]]
- return %0 : index
+ // CHECK: util.return %[[MIN]]
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @rangeMinSimplify
-func.func @rangeMinSimplify(%arg0: index, %arg1: index) -> (index, index) {
+util.func public @rangeMinSimplify(%arg0: index, %arg1: index) -> (index, index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -38,26 +38,26 @@
%0 = util.range.min %arg0, %c0, %arg0, %arg1 : index
// CHECK: %[[MIN1:.+]] = util.range.min %arg0, %arg1, %c1 : index
%1 = util.range.min %c3, %arg0, %c1, %arg1, %c2, %arg1 : index
- // CHECK: return %[[MIN0]], %[[MIN1]]
- return %0, %1 : index, index
+ // CHECK: util.return %[[MIN0]], %[[MIN1]]
+ util.return %0, %1 : index, index
}
// -----
// CHECK-LABEL: @rangeExtentsFoldConstants
-func.func @rangeExtentsFoldConstants() -> (index, index) {
+util.func public @rangeExtentsFoldConstants() -> (index, index) {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%0:2 = util.range.extents [%c1 for %c2], [%c2 for %c3] : index
- // CHECK: return %c1, %c4
- return %0#0, %0#1 : index, index
+ // CHECK: util.return %c1, %c4
+ util.return %0#0, %0#1 : index, index
}
// -----
// CHECK-LABEL: @rangeExtentsFoldConstantsDynamic
-func.func @rangeExtentsFoldConstantsDynamic(%arg0: index, %arg1: index) -> (index, index) {
+util.func public @rangeExtentsFoldConstantsDynamic(%arg0: index, %arg1: index) -> (index, index) {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
@@ -66,25 +66,25 @@
// CHECK: %[[RANGE_MIN:.+]] = arith.minui %arg0, %c1
// CHECK: %[[RANGE_MAX:.+]] = arith.maxui %[[RANGE_MAX_INC]], %c4
%0:2 = util.range.extents [%c1 for %c2], [%arg0 for %arg1], [%c2 for %c3] : index
- // CHECK: return %[[RANGE_MIN]], %[[RANGE_MAX]]
- return %0#0, %0#1 : index, index
+ // CHECK: util.return %[[RANGE_MIN]], %[[RANGE_MAX]]
+ util.return %0#0, %0#1 : index, index
}
// -----
// CHECK-LABEL: @rangeExtentsExpand1
-func.func @rangeExtentsExpand1(%arg0: index, %arg1: index) -> (index, index) {
+util.func public @rangeExtentsExpand1(%arg0: index, %arg1: index) -> (index, index) {
// CHECK: %[[RANGE_MAX_EXC:.+]] = arith.addi %arg0, %arg1
// CHECK: %[[RANGE_MAX_INC:.+]] = arith.subi %[[RANGE_MAX_EXC]], %c1
%0:2 = util.range.extents [%arg0 for %arg1] : index
- // CHECK: return %arg0, %[[RANGE_MAX_INC]]
- return %0#0, %0#1 : index, index
+ // CHECK: util.return %arg0, %[[RANGE_MAX_INC]]
+ util.return %0#0, %0#1 : index, index
}
// -----
// CHECK-LABEL: @rangeExtentsExpand2
-func.func @rangeExtentsExpand2(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> (index, index) {
+util.func public @rangeExtentsExpand2(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> (index, index) {
// CHECK: %[[RANGE_MIN:.+]] = arith.minui %arg0, %arg2
// CHECK: %[[RANGE0_MAX_EXC:.+]] = arith.addi %arg0, %arg1
// CHECK: %[[RANGE0_MAX_INC:.+]] = arith.subi %[[RANGE0_MAX_EXC]], %c1
@@ -92,15 +92,15 @@
// CHECK: %[[RANGE1_MAX_INC:.+]] = arith.subi %[[RANGE1_MAX_EXC]], %c1
// CHECK: %[[RANGE_MAX:.+]] = arith.maxui %[[RANGE0_MAX_INC]], %[[RANGE1_MAX_INC]]
%0:2 = util.range.extents [%arg0 for %arg1], [%arg2 for %arg3] : index
- // CHECK: return %[[RANGE_MIN]], %[[RANGE_MAX]]
- return %0#0, %0#1 : index, index
+ // CHECK: util.return %[[RANGE_MIN]], %[[RANGE_MAX]]
+ util.return %0#0, %0#1 : index, index
}
// -----
// CHECK-LABEL: @rangeExtentsDeduplicate
-func.func @rangeExtentsDeduplicate(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> (index, index) {
+util.func public @rangeExtentsDeduplicate(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) -> (index, index) {
// CHECK: = util.range.extents [%arg0 for %arg1], [%arg2 for %arg3], [%arg4 for %arg5] : index
%0:2 = util.range.extents [%arg0 for %arg1], [%arg2 for %arg3], [%arg0 for %arg1], [%arg4 for %arg5] : index
- return %0#0, %0#1 : index, index
+ util.return %0#0, %0#1 : index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/range_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/range_ops.mlir
index db297c7..c1b8209 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/range_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/range_ops.mlir
@@ -1,32 +1,32 @@
// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @rangeMin
-func.func @rangeMin(%arg0: index, %arg1: index, %arg2: index) {
+util.func public @rangeMin(%arg0: index, %arg1: index, %arg2: index) {
// CHECK: = util.range.min %arg0 : index
%0 = util.range.min %arg0 : index
// CHECK: = util.range.min %arg0, %arg1, %arg2 : index
%1 = util.range.min %arg0, %arg1, %arg2 : index
- return
+ util.return
}
// -----
// CHECK-LABEL: @rangeMax
-func.func @rangeMax(%arg0: index, %arg1: index, %arg2: index) {
+util.func public @rangeMax(%arg0: index, %arg1: index, %arg2: index) {
// CHECK: = util.range.max %arg0 : index
%0 = util.range.max %arg0 : index
// CHECK: = util.range.max %arg0, %arg1, %arg2 : index
%1 = util.range.max %arg0, %arg1, %arg2 : index
- return
+ util.return
}
// -----
// CHECK-LABEL: @rangeExtents
-func.func @rangeExtents(%arg0: index, %arg1: index, %arg2: index) {
+util.func public @rangeExtents(%arg0: index, %arg1: index, %arg2: index) {
// CHECK: = util.range.extents [%arg0 for %arg2] : index
%0:2 = util.range.extents [%arg0 for %arg2] : index
// CHECK: = util.range.extents [%arg0 for %arg2], [%arg1 for %arg2] : index
%1:2 = util.range.extents [%arg0 for %arg2], [%arg1 for %arg2] : index
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/structural_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/structural_ops.mlir
index 12f7b6c..8624d84 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/structural_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/structural_ops.mlir
@@ -95,7 +95,7 @@
util.func @basicCall(%arg0: tensor<?xf32>) -> (tensor<?xf32>, i32) {
// CHECK: %[[CALL:.+]]:2 = util.call @basicExtern(%[[ARG0]]) : (tensor<?xf32>) -> (tensor<?xf32>, i32)
%call:2 = util.call @basicExtern(%arg0) : (tensor<?xf32>) -> (tensor<?xf32>, i32)
- // CHECK: return %[[CALL]]#0, %[[CALL]]#1
+ // CHECK: util.return %[[CALL]]#0, %[[CALL]]#1
util.return %call#0, %call#1 : tensor<?xf32>, i32
}
@@ -109,7 +109,7 @@
util.func @inplaceCall(%arg0: tensor<?xf32>) -> tensor<?xf32> {
// CHECK: %[[CALL:.+]] = util.call @inplaceExtern(%[[ARG0]]) : (tensor<?xf32>) -> %[[ARG0]]
%call = util.call @inplaceExtern(%arg0) : (tensor<?xf32>) -> %arg0
- // CHECK: return %[[CALL]]
+ // CHECK: util.return %[[CALL]]
util.return %call : tensor<?xf32>
}
@@ -123,6 +123,6 @@
util.func public @inplaceTypeChangeCall(%arg0: tensor<?x4xf32>) -> tensor<4x?xi32> {
// CHECK: %[[CALL:.+]] = util.call @inplaceTypeChangeExtern(%[[ARG0]]) : (tensor<?x4xf32>) -> %[[ARG0]] as tensor<4x?xi32>
%call = util.call @inplaceTypeChangeExtern(%arg0) : (tensor<?x4xf32>) -> %arg0 as tensor<4x?xi32>
- // CHECK: return %[[CALL]]
+ // CHECK: util.return %[[CALL]]
util.return %call : tensor<4x?xi32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp
index 8a1123e..5b5300b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/IPO.cpp
@@ -18,7 +18,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
@@ -46,9 +45,9 @@
// callees for example.
struct FuncAnalysis {
// Function under analysis.
- func::FuncOp funcOp;
+ IREE::Util::FuncOp funcOp;
// All call sites across the whole program.
- SmallVector<func::CallOp> callOps;
+ SmallVector<IREE::Util::CallOp> callOps;
// Whether this function may be accessed indirectly or used externally.
// This generally disables optimizations.
@@ -131,13 +130,15 @@
};
// Note that the analysis results may be incomplete.
-static FuncAnalysis analyzeFuncOp(func::FuncOp funcOp, Explorer &explorer) {
+static FuncAnalysis analyzeFuncOp(IREE::Util::FuncOp funcOp,
+ Explorer &explorer) {
// Gather callers from across the program.
FuncAnalysis analysis;
analysis.funcOp = funcOp;
analysis.isIncomplete = funcOp.isPublic() || funcOp.isExternal();
if (explorer.walkIncomingCalls(funcOp, [&](mlir::CallOpInterface callOp) {
- if (auto funcCallOp = dyn_cast<func::CallOp>((Operation *)callOp)) {
+ if (auto funcCallOp =
+ dyn_cast<IREE::Util::CallOp>((Operation *)callOp)) {
analysis.callOps.push_back(funcCallOp);
} else {
analysis.isIncomplete = true;
@@ -147,6 +148,11 @@
analysis.isIncomplete = true;
}
+ // TODO(benvanik): support functions with tied operands.
+ if (funcOp.hasAnyTiedOperands()) {
+ analysis.isIncomplete = true;
+ }
+
// Presize data types so we can index them freely below.
unsigned argCount = funcOp.getNumArguments();
unsigned resultCount = funcOp.getNumResults();
@@ -168,7 +174,7 @@
// Walk all return sites in the function.
SmallVector<Value> seenResultValues(resultCount);
- funcOp.walk([&](func::ReturnOp returnOp) {
+ funcOp.walk([&](IREE::Util::ReturnOp returnOp) {
for (auto [i, value] : llvm::enumerate(returnOp.getOperands())) {
// Check to see if the value returned is a constant and stash.
// We'll only use this value if all return sites are uniform.
@@ -332,7 +338,7 @@
auto arg = funcOp.getArgument(argIndex);
bool onlyReturnUsers = true;
for (auto user : arg.getUsers()) {
- if (!isa<func::ReturnOp>(user)) {
+ if (!isa<IREE::Util::ReturnOp>(user)) {
onlyReturnUsers = false;
break;
}
@@ -400,7 +406,8 @@
}
// Returns true if any changes were made.
-static bool applyFuncChanges(FuncAnalysis &analysis, func::FuncOp funcOp) {
+static bool applyFuncChanges(FuncAnalysis &analysis,
+ IREE::Util::FuncOp funcOp) {
// Build the new set of function arguments and inline uniform constants.
auto builder = OpBuilder::atBlockBegin(&funcOp.getBlocks().front());
auto oldArgTypes = llvm::to_vector(funcOp.getArgumentTypes());
@@ -464,7 +471,7 @@
return false;
// Erase dead results from all return sites.
- funcOp.walk([&](func::ReturnOp returnOp) {
+ funcOp.walk([&](IREE::Util::ReturnOp returnOp) {
for (int i = deadResults.size() - 1; i >= 0; --i) {
if (deadResults.test(i))
returnOp.getOperandsMutable().erase(i);
@@ -481,7 +488,8 @@
}
// Returns true if any changes were made.
-static bool applyCallChanges(FuncAnalysis &analysis, func::CallOp callOp) {
+static bool applyCallChanges(FuncAnalysis &analysis,
+ IREE::Util::CallOp callOp) {
// Build the new set of call operands.
SmallVector<Value> oldOperands = callOp.getOperands();
SmallVector<Value> newOperands;
@@ -551,8 +559,10 @@
return false;
// Fully replace call op because we may have changed result count.
- auto newCallOp = OpBuilder(callOp).create<func::CallOp>(
- callOp.getLoc(), callOp.getCalleeAttr(), newResultTypes, newOperands);
+ // TODO(benvanik): update tied operands.
+ auto newCallOp = OpBuilder(callOp).create<IREE::Util::CallOp>(
+ callOp.getLoc(), newResultTypes, callOp.getCalleeAttr(), newOperands,
+ /*tied_operands=*/ArrayAttr{});
newCallOp->setDialectAttrs(callOp->getDialectAttrs());
// Remap live old results -> new results.
@@ -589,7 +599,7 @@
// across the whole program we can't perform any mutations during this
// analysis.
std::vector<FuncAnalysis> analysisResults;
- for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
+ for (auto funcOp : moduleOp.getOps<IREE::Util::FuncOp>()) {
analysisResults.push_back(analyzeFuncOp(funcOp, explorer));
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp
index b5d0a20..f8d1cbf 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/PropagateSubranges.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -54,7 +53,8 @@
// already exist offset globals as duplicates will get added and we'll need to
// rely on global fusion to get rid of them. Note that this only expands globals
// and does not yet update use sites - we just need the ops to reference.
-static ExpandedGlobalMap expandResourceGlobals(Operation *rootOp) {
+static ExpandedGlobalMap expandResourceGlobals(Operation *rootOp,
+ SymbolTable &symbolTable) {
ExpandedGlobalMap expandedGlobals;
// Gather all of the resource globals in the root.
@@ -67,7 +67,6 @@
}
// Expand each global by adding the offset right next to it.
- SymbolTable symbolTable(rootOp);
auto indexType = IndexType::get(rootOp->getContext());
for (auto &it : expandedGlobals) {
auto &global = it.second;
@@ -112,21 +111,25 @@
llvm::any_of(op->getResultTypes(), isResourceType);
}
+static void expandType(Type type, SmallVectorImpl<Type> &newTypes) {
+ newTypes.push_back(type);
+ if (isResourceType(type)) {
+ auto indexType = IndexType::get(type.getContext());
+ newTypes.push_back(indexType); // resource size
+ newTypes.push_back(indexType); // subrange offset
+ newTypes.push_back(indexType); // subrange length
+ }
+}
+
// Expands resources in the given |types| list to (resource, size, offset, len).
// This could be changed to some iterator magic to avoid the alloc.
static SmallVector<Type> expandTypes(TypeRange types) {
if (types.empty())
return {};
- auto indexType = IndexType::get(types.front().getContext());
SmallVector<Type> newTypes;
newTypes.reserve(types.size() * 2);
for (auto type : types) {
- newTypes.push_back(type);
- if (isResourceType(type)) {
- newTypes.push_back(indexType); // resource size
- newTypes.push_back(indexType); // subrange offset
- newTypes.push_back(indexType); // subrange length
- }
+ expandType(type, newTypes);
}
return newTypes;
}
@@ -178,6 +181,22 @@
}
}
+static void expandOperand(Location loc, Value operand,
+ SmallVectorImpl<Value> &newOperands,
+ SubrangeMap &subrangeMap, IndexSet &indexSet,
+ OpBuilder &builder) {
+ if (isResourceType(operand.getType())) {
+ auto subrange =
+ consumeSubrange(loc, operand, subrangeMap, indexSet, builder);
+ newOperands.push_back(subrange.resource);
+ newOperands.push_back(subrange.resourceSize);
+ newOperands.push_back(subrange.subrangeOffset);
+ newOperands.push_back(subrange.subrangeLength);
+ } else {
+ newOperands.push_back(operand);
+ }
+}
+
// Expands resources in |operands| into (resource, size, offset, length) tuples.
static SmallVector<Value> expandOperands(Location loc, ValueRange operands,
SubrangeMap &subrangeMap,
@@ -186,29 +205,21 @@
SmallVector<Value> result;
result.reserve(operands.size() * 2);
for (auto operand : operands) {
- if (isResourceType(operand.getType())) {
- auto subrange =
- consumeSubrange(loc, operand, subrangeMap, indexSet, builder);
- result.push_back(subrange.resource);
- result.push_back(subrange.resourceSize);
- result.push_back(subrange.subrangeOffset);
- result.push_back(subrange.subrangeLength);
- } else {
- result.push_back(operand);
- }
+ expandOperand(loc, operand, result, subrangeMap, indexSet, builder);
}
return result;
}
-static void expandSubranges(Operation *op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, SubrangeMap &subrangeMap);
+static void expandSubranges(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ SubrangeMap &subrangeMap);
// Recursively expands resources into (resource, size, offset, length) tuples
// within the given |region|. All branches, ops, and nested regions will be
// processed.
static void expandRegion(Region ®ion, bool canModifyEntryBlock,
- ExpandedGlobalMap &globalMap, IndexSet &indexSet,
- SubrangeMap subrangeMap) {
+ SymbolTable &symbolTable, ExpandedGlobalMap &globalMap,
+ IndexSet &indexSet, SubrangeMap subrangeMap) {
if (region.empty())
return;
@@ -255,14 +266,14 @@
if (region.hasOneBlock()) {
for (auto &op :
llvm::make_early_inc_range(region.front().getOperations())) {
- expandSubranges(&op, globalMap, indexSet, subrangeMap);
+ expandSubranges(&op, symbolTable, globalMap, indexSet, subrangeMap);
}
} else {
DominanceInfo domInfo(region.getParentOp());
for (auto *blockInfo : llvm::breadth_first(domInfo.getRootNode(®ion))) {
auto *block = blockInfo->getBlock();
for (auto &op : llvm::make_early_inc_range(block->getOperations())) {
- expandSubranges(&op, globalMap, indexSet, subrangeMap);
+ expandSubranges(&op, symbolTable, globalMap, indexSet, subrangeMap);
}
}
}
@@ -270,10 +281,12 @@
// Recursively expands all regions on the op.
static void expandRegions(Operation *op, bool canModifyEntryBlock,
+ SymbolTable &symbolTable,
ExpandedGlobalMap &globalMap, IndexSet &indexSet,
SubrangeMap subrangeMap) {
for (auto ®ion : op->getRegions()) {
- expandRegion(region, canModifyEntryBlock, globalMap, indexSet, subrangeMap);
+ expandRegion(region, canModifyEntryBlock, symbolTable, globalMap, indexSet,
+ subrangeMap);
}
}
@@ -385,23 +398,11 @@
}
static void expandInitializerOp(IREE::Util::InitializerOp op,
+ SymbolTable &symbolTable,
ExpandedGlobalMap &globalMap,
IndexSet &indexSet, SubrangeMap &subrangeMap) {
- expandRegion(op.getRegion(), /*canModifyEntryBlock=*/false, globalMap,
- indexSet, subrangeMap);
-}
-
-// Returns true if |op| is either public and visible to external modules or
-// external and resolved later on. We can't modify their signatures.
-static bool isPublicOrExternal(CallableOpInterface callableOp) {
- if (auto symbolOp = dyn_cast<SymbolOpInterface>(callableOp.getOperation())) {
- if (symbolOp.isPublic())
- return true;
- }
- auto *region = callableOp.getCallableRegion();
- if (!region || region->empty())
- return true;
- return false;
+ expandRegion(op.getRegion(), /*canModifyEntryBlock=*/false, symbolTable,
+ globalMap, indexSet, subrangeMap);
}
// Inserts subranges on resource arguments.
@@ -412,25 +413,26 @@
// sites don't need a wait.
//
// Example:
-// func.func @foo(%0: !stream.resource)
+// util.func @foo(%0: !stream.resource)
// ->
-// func.func @foo(%0: !stream.resource, %sz: index, %o: index, %l: index) {
+// util.func @foo(%0: !stream.resource, %sz: index, %o: index, %l: index) {
// %1 = stream.resource.subview %0[%o] : {%sz} -> {%l}
-static void expandFuncOp(mlir::func::FuncOp op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, SubrangeMap &subrangeMap) {
+static void expandFuncOp(IREE::Util::FuncOp op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ SubrangeMap &subrangeMap) {
// Ignore public/external function signatures but still convert regions.
- bool canModifyEntryBlock = !isPublicOrExternal(op);
+ bool canModifyEntryBlock = !IREE::Util::isPublicOrExternal(op);
if (canModifyEntryBlock) {
- auto oldType = op.getFunctionType();
- auto inputTypes = expandTypes(oldType.getInputs());
- auto resultTypes = expandTypes(oldType.getResults());
- auto newType = FunctionType::get(op.getContext(), inputTypes, resultTypes);
- if (newType != oldType) {
- op.setType(newType);
- }
+ op.expandSignature(
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ });
}
- expandRegion(op.getRegion(), canModifyEntryBlock, globalMap, indexSet,
- subrangeMap);
+ expandRegion(op.getRegion(), canModifyEntryBlock, symbolTable, globalMap,
+ indexSet, subrangeMap);
}
// Splits resource operands and results into (resource, resourceSize,
@@ -443,28 +445,31 @@
//
// Example:
// %1 = stream.resource.subview %0[%o] : {%sz} -> {%l}
-// %r = call @foo(%1)
+// %r = util.call @foo(%1)
// ->
-// %r, %rsz, %ro, %rl = call @foo(%0, %sz, %o, %l)
+// %r, %rsz, %ro, %rl = util.call @foo(%0, %sz, %o, %l)
// %2 = stream.resource.subview %r[%ro] : {%rsz} -> {%rl}
-static void expandCallOp(mlir::func::CallOp op, IndexSet &indexSet,
- SubrangeMap &subrangeMap) {
+static void expandCallOp(IREE::Util::CallOp op, SymbolTable &symbolTable,
+ IndexSet &indexSet, SubrangeMap &subrangeMap) {
if (!usesResources(op))
return;
// Ignore calls to public/external functions.
- auto calleeOp = SymbolTable::lookupNearestSymbolFrom<CallableOpInterface>(
- op, op.getCalleeAttr());
- if (isPublicOrExternal(calleeOp))
+ auto calleeOp = symbolTable.lookup<CallableOpInterface>(op.getCallee());
+ if (IREE::Util::isPublicOrExternal(calleeOp))
return;
// Build the new call op with expanded operands and results.
OpBuilder builder(op);
- auto operands = expandOperands(op.getLoc(), op.getOperands(), subrangeMap,
- indexSet, builder);
- auto resultTypes = expandTypes(op.getResultTypes());
- auto newOp = builder.create<mlir::func::CallOp>(op.getLoc(), op.getCallee(),
- resultTypes, operands);
+ auto newOp = op.cloneAndExpand(
+ [&](unsigned i, Value operand, SmallVectorImpl<Value> &newOperands) {
+ expandOperand(op.getLoc(), operand, newOperands, subrangeMap, indexSet,
+ builder);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ builder);
// Insert subranges on results that we are sinking across the call edge.
// The hope is that by moving the subranges here we can fold with uses inside
@@ -499,19 +504,19 @@
//
// Example:
// %1 = stream.resource.subview %0[%o] : {%sz} -> {%l}
-// return %1
+// util.return %1
// ->
-// return %0, %sz, %o, %l
-static void expandReturnOp(mlir::func::ReturnOp op, IndexSet &indexSet,
+// util.return %0, %sz, %o, %l
+static void expandReturnOp(IREE::Util::ReturnOp op, IndexSet &indexSet,
SubrangeMap &subrangeMap) {
if (!usesResources(op))
return;
- if (isPublicOrExternal(op->getParentOfType<mlir::func::FuncOp>()))
+ if (IREE::Util::isPublicOrExternal(op->getParentOfType<IREE::Util::FuncOp>()))
return;
OpBuilder builder(op);
auto operands = expandOperands(op.getLoc(), op.getOperands(), subrangeMap,
indexSet, builder);
- builder.create<mlir::func::ReturnOp>(op.getLoc(), operands);
+ builder.create<IREE::Util::ReturnOp>(op.getLoc(), operands);
op.erase();
}
@@ -573,8 +578,9 @@
// Recursively expands resources into (resource, size, offset, length) in |op|.
// TODO(benvanik): make this a type switch.
-static void expandSubranges(Operation *op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, SubrangeMap &subrangeMap) {
+static void expandSubranges(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ SubrangeMap &subrangeMap) {
if (auto subrangeOp = dyn_cast<IREE::Util::SubrangeOpInterface>(op)) {
return updateSubrangeOp(subrangeOp, indexSet, subrangeMap);
}
@@ -584,12 +590,13 @@
} else if (auto storeOp = dyn_cast<IREE::Util::GlobalStoreOpInterface>(op)) {
return expandGlobalStoreOp(storeOp, globalMap, indexSet, subrangeMap);
} else if (auto initializerOp = dyn_cast<IREE::Util::InitializerOp>(op)) {
- return expandInitializerOp(initializerOp, globalMap, indexSet, subrangeMap);
- } else if (auto funcOp = dyn_cast<mlir::func::FuncOp>(op)) {
- return expandFuncOp(funcOp, globalMap, indexSet, subrangeMap);
- } else if (auto callOp = dyn_cast<mlir::func::CallOp>(op)) {
- return expandCallOp(callOp, indexSet, subrangeMap);
- } else if (auto returnOp = dyn_cast<mlir::func::ReturnOp>(op)) {
+ return expandInitializerOp(initializerOp, symbolTable, globalMap, indexSet,
+ subrangeMap);
+ } else if (auto funcOp = dyn_cast<IREE::Util::FuncOp>(op)) {
+ return expandFuncOp(funcOp, symbolTable, globalMap, indexSet, subrangeMap);
+ } else if (auto callOp = dyn_cast<IREE::Util::CallOp>(op)) {
+ return expandCallOp(callOp, symbolTable, indexSet, subrangeMap);
+ } else if (auto returnOp = dyn_cast<IREE::Util::ReturnOp>(op)) {
return expandReturnOp(returnOp, indexSet, subrangeMap);
} else if (auto branchOp = dyn_cast<mlir::cf::BranchOp>(op)) {
return expandBranchOp(branchOp, indexSet, subrangeMap);
@@ -604,14 +611,14 @@
// We could add an interface to ops we want to do this to, though, to at least
// allow dialects to plug in. For now we just need SCF so this is hardcoded.
if (auto ifOp = dyn_cast<mlir::scf::IfOp>(op)) {
- return expandRegions(ifOp, /*canModifyEntryBlock=*/false, globalMap,
- indexSet, subrangeMap);
+ return expandRegions(ifOp, /*canModifyEntryBlock=*/false, symbolTable,
+ globalMap, indexSet, subrangeMap);
} else if (auto forOp = dyn_cast<mlir::scf::ForOp>(op)) {
- return expandRegions(forOp, /*canModifyEntryBlock=*/false, globalMap,
- indexSet, subrangeMap);
+ return expandRegions(forOp, /*canModifyEntryBlock=*/false, symbolTable,
+ globalMap, indexSet, subrangeMap);
} else if (auto whileOp = dyn_cast<mlir::scf::WhileOp>(op)) {
- return expandRegions(whileOp, /*canModifyEntryBlock=*/false, globalMap,
- indexSet, subrangeMap);
+ return expandRegions(whileOp, /*canModifyEntryBlock=*/false, symbolTable,
+ globalMap, indexSet, subrangeMap);
}
// TODO(benvanik): also handle scf.yield: today we don't propagate across
// return values.
@@ -634,16 +641,16 @@
public:
void getDependentDialects(DialectRegistry ®istry) const override {
registry.insert<mlir::arith::ArithDialect>();
- registry.insert<mlir::func::FuncDialect>();
registry.insert<mlir::scf::SCFDialect>();
registry.insert<IREE::Util::UtilDialect>();
}
void runOnOperation() override {
auto rootOp = getOperation();
+ SymbolTable symbolTable(rootOp);
// Expand all util.global ops holding resources into resource and subrange.
- auto globalMap = expandResourceGlobals(rootOp);
+ auto globalMap = expandResourceGlobals(rootOp, symbolTable);
// Walk the entire IR tree and expand the globals.
// We could do this via pattern application but that gets much trickier to
@@ -658,7 +665,8 @@
IndexSet indexSet(callableOp.getLoc(),
OpBuilder::atBlockBegin(®ion->front()));
SubrangeMap subrangeMap;
- expandSubranges(callableOp, globalMap, indexSet, subrangeMap);
+ expandSubranges(callableOp, symbolTable, globalMap, indexSet,
+ subrangeMap);
}
}
};
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp
index 30f3057..1a39957 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/TestConversion.cpp
@@ -12,7 +12,6 @@
#include "iree/compiler/Dialect/Util/Transforms/Passes.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -26,9 +25,9 @@
TestConversionPass() = default;
TestConversionPass(const TestConversionPass &) {}
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::Util::UtilDialect, func::FuncDialect,
- mlir::arith::ArithDialect, math::MathDialect,
- mlir::affine::AffineDialect, memref::MemRefDialect>();
+ registry.insert<IREE::Util::UtilDialect, mlir::arith::ArithDialect,
+ math::MathDialect, mlir::affine::AffineDialect,
+ memref::MemRefDialect>();
}
void runOnOperation() override {
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/combine_initializers.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/combine_initializers.mlir
index a69a09d..df2ce82 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/combine_initializers.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/combine_initializers.mlir
@@ -2,12 +2,12 @@
// Tests that multiple initializers are combined in their module order.
-func.func private @extern() -> index
+util.func private @extern() -> index
// CHECK: util.global private mutable @global0 : index
util.global private mutable @global0 : index
util.initializer {
- %value0 = func.call @extern() : () -> index
+ %value0 = util.call @extern() : () -> index
util.global.store %value0, @global0 : index
util.return
}
@@ -16,28 +16,28 @@
// CHECK-NEXT: util.global private @global2 : index
util.global private @global2 : index
util.initializer {
- %value1 = func.call @extern() : () -> index
+ %value1 = util.call @extern() : () -> index
util.global.store %value1, @global1 : index
- %value2 = func.call @extern() : () -> index
+ %value2 = util.call @extern() : () -> index
util.global.store %value2, @global2 : index
util.return
}
// CHECK-NEXT: util.initializer {
-// CHECK-NEXT: %[[VALUE0:.+]] = func.call @extern()
+// CHECK-NEXT: %[[VALUE0:.+]] = util.call @extern()
// CHECK-NEXT: util.global.store %[[VALUE0]], @global0
-// CHECK-NEXT: %[[VALUE1:.+]] = func.call @extern()
+// CHECK-NEXT: %[[VALUE1:.+]] = util.call @extern()
// CHECK-NEXT: util.global.store %[[VALUE1]], @global1
-// CHECK-NEXT: %[[VALUE2:.+]] = func.call @extern()
+// CHECK-NEXT: %[[VALUE2:.+]] = util.call @extern()
// CHECK-NEXT: util.global.store %[[VALUE2]], @global2
// CHECK-NEXT: util.return
// CHECK-LABEL: @orderedCombining
-func.func @orderedCombining(%arg0: index) -> (index, index, index) {
+util.func @orderedCombining(%arg0: index) -> (index, index, index) {
util.global.store %arg0, @global0 : index
%value0 = util.global.load @global0 : index
%value1 = util.global.load @global1 : index
%value2 = util.global.load @global2 : index
- return %value0, %value1, %value2 : index, index, index
+ util.return %value0, %value1, %value2 : index, index, index
}
// -----
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f32_to_f16.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f32_to_f16.mlir
index 6285d5e..55eee1e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f32_to_f16.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f32_to_f16.mlir
@@ -3,26 +3,26 @@
// NOTE: for more comprehensive tests see demote_i64_to_i32.mlir.
// CHECK: util.global {{.*}} : tensor<4xf16>
-// CHECK-LABEL: func.func @simple_f32() -> tensor<4xf16>
+// CHECK-LABEL: util.func public @simple_f32() -> tensor<4xf16>
// CHECK-NEXT: %{{.*}} = util.global.address @__global : !util.ptr<tensor<4xf16>>
// CHECK-NEXT: %{{.*}} = util.global.load.indirect %{{.*}} : !util.ptr<tensor<4xf16>> -> tensor<4xf16>
-// CHECK-NEXT: return %{{.*}} : tensor<4xf16>
+// CHECK-NEXT: util.return %{{.*}} : tensor<4xf16>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf32>
-func.func @simple_f32() -> (tensor<4xf32>) {
+util.func public @simple_f32() -> (tensor<4xf32>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf32>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
- return %1 : tensor<4xf32>
+ util.return %1 : tensor<4xf32>
}
// -----
// CHECK: util.global
// CHECK-NOT: f32
-// CHECK-LABEL: func.func @nested_region_f32()
+// CHECK-LABEL: util.func public @nested_region_f32()
// CHECK-NOT: f32
-// CHECK: return %{{.*}} : tensor<?xf16>
+// CHECK: util.return %{{.*}} : tensor<?xf16>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf32>
-func.func @nested_region_f32() -> (tensor<?xf32>) {
+util.func public @nested_region_f32() -> (tensor<?xf32>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf32>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
%c4 = arith.constant 4 : index
@@ -31,5 +31,5 @@
%element = tensor.extract %1[%arg0] : tensor<4xf32>
tensor.yield %element : f32
} : tensor<?xf32>
- return %2 : tensor<?xf32>
+ util.return %2 : tensor<?xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f64_to_f32.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f64_to_f32.mlir
index ceb926b..bd481ce 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f64_to_f32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_f64_to_f32.mlir
@@ -2,46 +2,46 @@
// NOTE: for more comprehensive tests see demote_i64_to_i32.mlir.
-// CHECK-LABEL: func.func @constantF64
+// CHECK-LABEL: util.func public @constantF64
// CHECK-SAME: () -> f32
-func.func @constantF64() -> f64 {
+util.func public @constantF64() -> f64 {
// CHECK-NEXT: constant 123.{{.+}} : f32
%c1234 = arith.constant 123.4 : f64
- return %c1234 : f64
+ util.return %c1234 : f64
}
// -----
-// CHECK-LABEL: func.func @tensorTypesF64
+// CHECK-LABEL: util.func public @tensorTypesF64
// CHECK-SAME: (%arg0: tensor<4x4xf32>) -> tensor<4x4xf32>
-func.func @tensorTypesF64(%arg0 : tensor<4x4xf64>) -> tensor<4x4xf64> {
+util.func public @tensorTypesF64(%arg0 : tensor<4x4xf64>) -> tensor<4x4xf64> {
// CHECK-NEXT: return %arg0 : tensor<4x4xf32>
- return %arg0 : tensor<4x4xf64>
+ util.return %arg0 : tensor<4x4xf64>
}
// -----
// CHECK: util.global {{.*}} : tensor<4xf32>
-// CHECK-LABEL: func.func @simple_f64() -> tensor<4xf32>
+// CHECK-LABEL: util.func public @simple_f64() -> tensor<4xf32>
// CHECK-NEXT: %{{.*}} = util.global.address @__global : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: %{{.*}} = util.global.load.indirect %{{.*}} : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
-// CHECK-NEXT: return %{{.*}} : tensor<4xf32>
+// CHECK-NEXT: util.return %{{.*}} : tensor<4xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf64>
-func.func @simple_f64() -> (tensor<4xf64>) {
+util.func public @simple_f64() -> (tensor<4xf64>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf64>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf64>> -> tensor<4xf64>
- return %1 : tensor<4xf64>
+ util.return %1 : tensor<4xf64>
}
// -----
// CHECK: util.global
// CHECK-NOT: f64
-// CHECK-LABEL: func.func @nested_region_f64()
+// CHECK-LABEL: util.func public @nested_region_f64()
// CHECK-NOT: f64
-// CHECK: return %{{.*}} : tensor<?xf32>
+// CHECK: util.return %{{.*}} : tensor<?xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf64>
-func.func @nested_region_f64() -> (tensor<?xf64>) {
+util.func public @nested_region_f64() -> (tensor<?xf64>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf64>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf64>> -> tensor<4xf64>
%c4 = arith.constant 4 : index
@@ -50,35 +50,35 @@
%element = tensor.extract %1[%arg0] : tensor<4xf64>
tensor.yield %element : f64
} : tensor<?xf64>
- return %2 : tensor<?xf64>
+ util.return %2 : tensor<?xf64>
}
// -----
// Check handling of width-sensitive arith casts.
-// CHECK-LABEL: func.func @arith.truncf(
-// CHECK-SAME: %[[ARG0:.*]]: f32) -> f32 {
-// CHECK: return %[[ARG0]] : f32
-func.func @arith.truncf(%arg0: f64) -> f32 {
+// CHECK-LABEL: util.func public @arith.truncf(
+// CHECK-SAME: %[[ARG0:.*]]: f32) -> f32 {
+// CHECK: util.return %[[ARG0]] : f32
+util.func public @arith.truncf(%arg0: f64) -> f32 {
%0 = arith.truncf %arg0 : f64 to f32
- return %0 : f32
+ util.return %0 : f32
}
-// CHECK-LABEL: func.func @arith.extf(
-// CHECK-SAME: %[[ARG0:.*]]: f32) -> f32 {
-// CHECK: return %[[ARG0]] : f32
-func.func @arith.extf(%arg0: f32) -> f64 {
+// CHECK-LABEL: util.func public @arith.extf(
+// CHECK-SAME: %[[ARG0:.*]]: f32) -> f32 {
+// CHECK: util.return %[[ARG0]] : f32
+util.func public @arith.extf(%arg0: f32) -> f64 {
%0 = arith.extf %arg0 : f32 to f64
- return %0 : f64
+ util.return %0 : f64
}
// -----
-// CHECK-LABEL: func.func @complexTypesF64
+// CHECK-LABEL: util.func public @complexTypesF64
// CHECK-SAME: (%arg0: complex<f32>) -> complex<f32>
-func.func @complexTypesF64(%arg0 : complex<f64>) -> complex<f64> {
- // CHECK-NEXT: return %arg0 : complex<f32>
- return %arg0 : complex<f64>
+util.func public @complexTypesF64(%arg0 : complex<f64>) -> complex<f64> {
+ // CHECK-NEXT: util.return %arg0 : complex<f32>
+ util.return %arg0 : complex<f64>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_i64_to_i32.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_i64_to_i32.mlir
index 1aa3b54..e05669e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_i64_to_i32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/demote_i64_to_i32.mlir
@@ -1,58 +1,58 @@
// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-util-demote-i64-to-i32 %s | FileCheck %s
-// CHECK-LABEL: func.func @constant_i64
+// CHECK-LABEL: util.func public @constant_i64
// CHECK-SAME: () -> i32
-func.func @constant_i64() -> i64 {
+util.func public @constant_i64() -> i64 {
// CHECK-NEXT: constant 123 : i32
%c123 = arith.constant 123 : i64
- return %c123 : i64
+ util.return %c123 : i64
}
// -----
-// CHECK-LABEL: func.func @constant_splat_i64
+// CHECK-LABEL: util.func public @constant_splat_i64
// CHECK-SAME: () -> tensor<4xi32>
-func.func @constant_splat_i64() -> tensor<4xi64> {
+util.func public @constant_splat_i64() -> tensor<4xi64> {
// CHECK-NEXT: constant dense<123> : tensor<4xi32>
%c123 = arith.constant dense<123> : tensor<4xi64>
- return %c123 : tensor<4xi64>
+ util.return %c123 : tensor<4xi64>
}
// -----
-// CHECK-LABEL: func.func @constant_dense_i64
+// CHECK-LABEL: util.func public @constant_dense_i64
// CHECK-SAME: () -> tensor<4xi32>
-func.func @constant_dense_i64() -> tensor<4xi64> {
+util.func public @constant_dense_i64() -> tensor<4xi64> {
// CHECK-NEXT: constant dense<[0, 1, 2, 3]> : tensor<4xi32>
%c123 = arith.constant dense<[0, 1, 2, 3]> : tensor<4xi64>
- return %c123 : tensor<4xi64>
+ util.return %c123 : tensor<4xi64>
}
// -----
-// CHECK-LABEL: func.func @args_i64
+// CHECK-LABEL: util.func public @args_i64
// CHECK-SAME: (%arg0: i32) -> i32
-func.func @args_i64(%arg0: i64) -> i64 {
+util.func public @args_i64(%arg0: i64) -> i64 {
// CHECK-NEXT: return %arg0 : i32
- return %arg0 : i64
+ util.return %arg0 : i64
}
// -----
-// CHECK-LABEL: func.func @args_ui64
+// CHECK-LABEL: util.func public @args_ui64
// CHECK-SAME: (%arg0: ui32) -> ui32
-func.func @args_ui64(%arg0: ui64) -> ui64 {
+util.func public @args_ui64(%arg0: ui64) -> ui64 {
// CHECK-NEXT: return %arg0 : ui32
- return %arg0 : ui64
+ util.return %arg0 : ui64
}
// -----
-// CHECK-LABEL: func.func @args_tensor_i64
+// CHECK-LABEL: util.func public @args_tensor_i64
// CHECK-SAME: (%arg0: tensor<4x4xi32>) -> tensor<4x4xi32>
-func.func @args_tensor_i64(%arg0: tensor<4x4xi64>) -> tensor<4x4xi64> {
+util.func public @args_tensor_i64(%arg0: tensor<4x4xi64>) -> tensor<4x4xi64> {
// CHECK-NEXT: return %arg0 : tensor<4x4xi32>
- return %arg0 : tensor<4x4xi64>
+ util.return %arg0 : tensor<4x4xi64>
}
// -----
@@ -60,29 +60,29 @@
// Return types should be converted for all operations, even those that the
// core compiler is not directly aware of.
-// CHECK-LABEL: func.func @custom_constant_i64
+// CHECK-LABEL: util.func public @custom_constant_i64
// CHECK-SAME: () -> tensor<1xi32>
-func.func @custom_constant_i64() -> tensor<1xi64> {
+util.func public @custom_constant_i64() -> tensor<1xi64> {
// CHECK-NEXT: "custom.constant"() : () -> tensor<1xi32>
%c0 = "custom.constant"() : () -> tensor<1xi64>
- return %c0 : tensor<1xi64>
+ util.return %c0 : tensor<1xi64>
}
// -----
-// CHECK-LABEL: func.func @custom_constant_ui64
+// CHECK-LABEL: util.func public @custom_constant_ui64
// CHECK-SAME: () -> tensor<1xui32>
-func.func @custom_constant_ui64() -> tensor<1xui64> {
+util.func public @custom_constant_ui64() -> tensor<1xui64> {
// CHECK-NEXT: "custom.constant"() : () -> tensor<1xui32>
%c0 = "custom.constant"() : () -> tensor<1xui64>
- return %c0 : tensor<1xui64>
+ util.return %c0 : tensor<1xui64>
}
// -----
-// CHECK-LABEL: func.func @arith_cmpi_i64
+// CHECK-LABEL: util.func public @arith_cmpi_i64
// CHECK-SAME: (%arg0: tensor<i32>, %arg1: tensor<i32>) -> (i1, tensor<i32>)
-func.func @arith_cmpi_i64(%arg0 : tensor<i64>, %arg1 : tensor<i64>) -> (i1, tensor<i64>) {
+util.func public @arith_cmpi_i64(%arg0 : tensor<i64>, %arg1 : tensor<i64>) -> (i1, tensor<i64>) {
// CHECK-NEXT: %0 = arith.cmpi slt, %arg0, %arg1 : tensor<i32>
// CHECK-NEXT: %[[EXT:.*]] = tensor.extract %0[] : tensor<i1>
// CHECK-NEXT: cf.cond_br %[[EXT]], ^bb1(%[[EXT]], %arg0 : i1, tensor<i32>), ^bb2(%[[EXT]], %arg1 : i1, tensor<i32>)
@@ -94,28 +94,28 @@
%1 = tensor.extract %0[] : tensor<i1>
cf.cond_br %1, ^bb1(%1, %arg0 : i1, tensor<i64>), ^bb2(%1, %arg1 : i1, tensor<i64>)
^bb1(%2 : i1, %3 : tensor<i64>):
- return %2, %3 : i1, tensor<i64>
+ util.return %2, %3 : i1, tensor<i64>
^bb2(%4 : i1, %5 : tensor<i64>):
- return %4, %5 : i1, tensor<i64>
+ util.return %4, %5 : i1, tensor<i64>
}
// -----
-// CHECK-LABEL: func.func @linalg_matmul_i64
-func.func @linalg_matmul_i64(%arg0: tensor<2x3xi64>, %arg1: tensor<3x4xi64>, %arg2: tensor<2x4xi64>) -> tensor<2x4xi64> {
+// CHECK-LABEL: util.func public @linalg_matmul_i64
+util.func public @linalg_matmul_i64(%arg0: tensor<2x3xi64>, %arg1: tensor<3x4xi64>, %arg2: tensor<2x4xi64>) -> tensor<2x4xi64> {
// CHECK: %[[T:.+]] = linalg.matmul ins(%arg0, %arg1 : tensor<2x3xi32>, tensor<3x4xi32>)
// CHECK-SAME: outs(%arg2 : tensor<2x4xi32>) -> tensor<2x4xi32>
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<2x3xi64>, tensor<3x4xi64>)
outs(%arg2 : tensor<2x4xi64>) -> tensor<2x4xi64>
// CHECK-NEXT: return %[[T:.+]] : tensor<2x4xi32>
- return %0 : tensor<2x4xi64>
+ util.return %0 : tensor<2x4xi64>
}
// -----
-// CHECK-LABEL: func.func @linalg_generic_i64
+// CHECK-LABEL: util.func public @linalg_generic_i64
// CHECK-SAME: (%[[ARG:.+]]: tensor<2xi32>) -> tensor<2xi32>
-func.func @linalg_generic_i64(%arg: tensor<2xi64>) -> tensor<2xi64> {
+util.func public @linalg_generic_i64(%arg: tensor<2xi64>) -> tensor<2xi64> {
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<2xi32>
%init = tensor.empty() : tensor<2xi64>
// CHECK: %[[T:.+]] = linalg.generic {{.+}} ins(%[[ARG]] : tensor<2xi32>) outs(%[[INIT]] : tensor<2xi32>)
@@ -126,18 +126,18 @@
linalg.yield %arg1 : i64
} -> tensor<2xi64>
// CHECK: %[[T]] : tensor<2xi32>
- return %generic : tensor<2xi64>
+ util.return %generic : tensor<2xi64>
}
// -----
-// CHECK-LABEL: func.func @linalg_non_structured_op
+// CHECK-LABEL: util.func public @linalg_non_structured_op
// CHECK-SAME: (%arg0: tensor<9xi32>) -> tensor<1x9xi32>
-func.func @linalg_non_structured_op(%arg0: tensor<9xi64>) -> tensor<1x9xi64> {
+util.func public @linalg_non_structured_op(%arg0: tensor<9xi64>) -> tensor<1x9xi64> {
// CHECK: %[[RES:.+]] = tensor.expand_shape %arg0 {{\[}}[0, 1]] : tensor<9xi32> into tensor<1x9xi32>
- // CHECK: return %[[RES:.+]] : tensor<1x9xi32>
+ // CHECK: util.return %[[RES:.+]] : tensor<1x9xi32>
%0 = tensor.expand_shape %arg0 [[0, 1]] : tensor<9xi64> into tensor<1x9xi64>
- return %0 : tensor<1x9xi64>
+ util.return %0 : tensor<1x9xi64>
}
// -----
@@ -146,11 +146,11 @@
// CHECK: util.global.load @[[VAR]]
// CHECK: util.global.store %{{.+}}, @[[VAR]]
util.global mutable @readwritevar = dense<0> : tensor<i64>
-func.func @foo(%arg0 : tensor<i64>) {
+util.func public @foo(%arg0 : tensor<i64>) {
%0 = util.global.load @readwritevar : tensor<i64>
%1 = arith.addi %0, %arg0 : tensor<i64>
util.global.store %1, @readwritevar : tensor<i64>
- return
+ util.return
}
// -----
@@ -158,38 +158,38 @@
// CHECK: util.global private @{{.+}} : tensor<4xi32>
util.global private @v_initializer : tensor<4xi64>
util.initializer {
- // CHECK: %[[VALUE:.+]] = func.call @initializer() : () -> tensor<4xi32>
- %0 = func.call @initializer() : () -> tensor<4xi64>
+ // CHECK: %[[VALUE:.+]] = util.call @initializer() : () -> tensor<4xi32>
+ %0 = util.call @initializer() : () -> tensor<4xi64>
// CHECK: util.global.store %[[VALUE]], @v_initializer : tensor<4xi32>
util.global.store %0, @v_initializer : tensor<4xi64>
util.return
}
-// CHECK: func.func private @initializer() -> tensor<4xi32>
-func.func private @initializer() -> tensor<4xi64>
+// CHECK: util.func private @initializer() -> tensor<4xi32>
+util.func private @initializer() -> tensor<4xi64>
// -----
// CHECK: util.global {{.*}} : tensor<4xi32>
-// CHECK-LABEL: func.func @simple_i64() -> tensor<4xi32>
+// CHECK-LABEL: util.func public @simple_i64() -> tensor<4xi32>
// CHECK-NEXT: %{{.*}} = util.global.address @__global : !util.ptr<tensor<4xi32>>
// CHECK-NEXT: %{{.*}} = util.global.load.indirect %{{.*}} : !util.ptr<tensor<4xi32>> -> tensor<4xi32>
-// CHECK-NEXT: return %{{.*}} : tensor<4xi32>
+// CHECK-NEXT: util.return %{{.*}} : tensor<4xi32>
util.global private @"__global" = dense<[1, 2, 3, 4]> : tensor<4xi64>
-func.func @simple_i64() -> (tensor<4xi64>) {
+util.func public @simple_i64() -> (tensor<4xi64>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xi64>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xi64>> -> tensor<4xi64>
- return %1 : tensor<4xi64>
+ util.return %1 : tensor<4xi64>
}
// -----
// CHECK: util.global
// CHECK-NOT: i64
-// CHECK-LABEL: func.func @nested_region_i64()
+// CHECK-LABEL: util.func public @nested_region_i64()
// CHECK-NOT: i64
-// CHECK: return %{{.*}} : tensor<?xi32>
+// CHECK: util.return %{{.*}} : tensor<?xi32>
util.global private @"__global" = dense<[1, 2, 3, 4]> : tensor<4xi64>
-func.func @nested_region_i64() -> (tensor<?xi64>) {
+util.func public @nested_region_i64() -> (tensor<?xi64>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xi64>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xi64>> -> tensor<4xi64>
%c4 = arith.constant 4 : index
@@ -198,35 +198,35 @@
%element = tensor.extract %1[%arg0] : tensor<4xi64>
tensor.yield %element : i64
} : tensor<?xi64>
- return %2 : tensor<?xi64>
+ util.return %2 : tensor<?xi64>
}
// -----
// Check handling of width-sensitive arith casts.
-// CHECK-LABEL: func.func @arith.trunci(
-// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
-// CHECK: return %[[ARG0]] : i32
-func.func @arith.trunci(%arg0: i64) -> i32 {
+// CHECK-LABEL: util.func public @arith.trunci(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: util.return %[[ARG0]] : i32
+util.func public @arith.trunci(%arg0: i64) -> i32 {
%0 = arith.trunci %arg0 : i64 to i32
- return %0 : i32
+ util.return %0 : i32
}
-// CHECK-LABEL: func.func @arith.extui(
-// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
-// CHECK: return %[[ARG0]] : i32
-func.func @arith.extui(%arg0: i32) -> i64 {
+// CHECK-LABEL: util.func public @arith.extui(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: util.return %[[ARG0]] : i32
+util.func public @arith.extui(%arg0: i32) -> i64 {
%0 = arith.extui %arg0 : i32 to i64
- return %0 : i64
+ util.return %0 : i64
}
-// CHECK-LABEL: func.func @arith.extsi(
-// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
-// CHECK: return %[[ARG0]] : i32
-func.func @arith.extsi(%arg0: i32) -> i64 {
+// CHECK-LABEL: util.func public @arith.extsi(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: util.return %[[ARG0]] : i32
+util.func public @arith.extsi(%arg0: i32) -> i64 {
%0 = arith.extsi %arg0 : i32 to i64
- return %0 : i64
+ util.return %0 : i64
}
// -----
@@ -236,13 +236,13 @@
// CHECK: ml_program.global
// CHECK-SAME: i32
"ml_program.global"() {sym_name = "_v", sym_visibility = "private", type = tensor<2x2xi64>, value = dense<1> : tensor<2x2xi64>} : () -> ()
-func.func @run() -> tensor<2x2xi64> {
+util.func public @run() -> tensor<2x2xi64> {
%0 = "ml_program.global_load"() {global = @_v} : () -> tensor<2x2xi64>
- %1 = call @f(%0) : (tensor<2x2xi64>) -> tensor<2x2xi64>
- return %1 : tensor<2x2xi64>
+ %1 = util.call @f(%0) : (tensor<2x2xi64>) -> tensor<2x2xi64>
+ util.return %1 : tensor<2x2xi64>
}
-func.func private @f(%arg0: tensor<2x2xi64>) -> tensor<2x2xi64> {
- return %arg0 : tensor<2x2xi64>
+util.func private @f(%arg0: tensor<2x2xi64>) -> tensor<2x2xi64> {
+ util.return %arg0 : tensor<2x2xi64>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
index 319bd85..717d2bf 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
@@ -4,18 +4,18 @@
// If you move or delete it, please update the documentation accordingly.
// CHECK-LABEL: @constant
-func.func @constant() -> i32 {
+util.func @constant() -> i32 {
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : i32
%0 = util.optimization_barrier %c1 : i32
- // CHECK-NEXT: return %[[C1]]
- return %0 : i32
+ // CHECK-NEXT: util.return %[[C1]]
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @multiple
-func.func @multiple() -> (i32, i32) {
+util.func @multiple() -> (i32, i32) {
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : i32
%0 = util.optimization_barrier %c1 : i32
@@ -24,34 +24,34 @@
%c2 = arith.constant 2 : i32
%2 = util.optimization_barrier %1 : i32
%3 = util.optimization_barrier %c2 : i32
- // CHECK-NEXT: return %[[C1]], %[[C2]]
- return %2, %3 : i32, i32
+ // CHECK-NEXT: util.return %[[C1]], %[[C2]]
+ util.return %2, %3 : i32, i32
}
// -----
// CHECK-LABEL: @multiple_operands
-func.func @multiple_operands() -> (i32, i32) {
+util.func @multiple_operands() -> (i32, i32) {
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : i32
// CHECK-NEXT: %[[C2:.+]] = arith.constant 2
%c2 = arith.constant 2 : i32
%0, %1 = util.optimization_barrier %c1, %c2 : i32, i32
- // CHECK-NEXT: return %[[C1]], %[[C2]]
- return %0, %1 : i32, i32
+ // CHECK-NEXT: util.return %[[C1]], %[[C2]]
+ util.return %0, %1 : i32, i32
}
// -----
// CHECK-LABEL: @no_fold_add
-func.func @no_fold_add() -> (i32) {
+util.func @no_fold_add() -> (i32) {
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : i32
%c1 = arith.constant 1 : i32
%0 = util.optimization_barrier %c1 : i32
// CHECK-NEXT: %[[R:.+]] = arith.addi %[[C1]], %[[C1]]
%1 = arith.addi %0, %0 : i32
- // CHECK-NEXT: return %[[R]]
- return %1 : i32
+ // CHECK-NEXT: util.return %[[R]]
+ util.return %1 : i32
}
// -----
@@ -63,12 +63,12 @@
// CHECK-LABEL: @inner
module @inner {
// CHECK-LABEL: @constant
- func.func @constant() -> i32 {
+ util.func @constant() -> i32 {
// CHECK-NEXT: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : i32
%0 = util.optimization_barrier %c1 : i32
- // CHECK-NEXT: return %[[C1]]
- return %0 : i32
+ // CHECK-NEXT: util.return %[[C1]]
+ util.return %0 : i32
}
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fold_globals.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fold_globals.mlir
index 6eaeb1b..3e38eaa 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fold_globals.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fold_globals.mlir
@@ -2,34 +2,34 @@
// CHECK: util.global public mutable @uniformConstants = 5 : index
util.global public mutable @uniformConstants : index
-func.func @foo() {
+util.func @foo() {
%c5 = arith.constant 5 : index
// CHECK-NOT: util.global.store %c5, @uniformConstants : index
util.global.store %c5, @uniformConstants : index
- return
+ util.return
}
-func.func @bar() {
+util.func @bar() {
%c5 = arith.constant 5 : index
// CHECK-NOT: util.global.store %c5, @uniformConstants : index
util.global.store %c5, @uniformConstants : index
- return
+ util.return
}
// -----
// CHECK: util.global public mutable @nonuniformConstants : index
util.global public mutable @nonuniformConstants : index
-func.func @foo() {
+util.func @foo() {
%c5 = arith.constant 5 : index
// CHECK: util.global.store %c5, @nonuniformConstants : index
util.global.store %c5, @nonuniformConstants : index
- return
+ util.return
}
-func.func @bar() {
+util.func @bar() {
%c6 = arith.constant 6 : index
// CHECK: util.global.store %c6, @nonuniformConstants : index
util.global.store %c6, @nonuniformConstants : index
- return
+ util.return
}
// -----
@@ -38,13 +38,13 @@
util.global private mutable @chained0 : index
// CHECK-NOT: util.global private mutable @chained1 : index
util.global private mutable @chained1 : index
-func.func @foo() -> index {
+util.func @foo() -> index {
// CHECK: %[[VALUE:.+]] = util.global.load immutable @chained0 : index
%0 = util.global.load @chained0 : index
// CHECK-NOT: util.global.store
util.global.store %0, @chained1 : index
// CHECK-NEXT: return %[[VALUE]]
- return %0 : index
+ util.return %0 : index
}
// -----
@@ -53,17 +53,17 @@
util.global public mutable @unchained0 : index
// CHECK: util.global public mutable @unchained1 : index
util.global public mutable @unchained1 : index
-func.func @foo() {
+util.func @foo() {
// CHECK: %[[VALUE:.+]] = util.global.load @unchained0 : index
%0 = util.global.load @unchained0 : index
// CHECK: util.global.store %[[VALUE]], @unchained1 : index
util.global.store %0, @unchained1 : index
- return
+ util.return
}
-func.func @bar(%arg0: index) {
+util.func @bar(%arg0: index) {
// CHECK: util.global.store %arg0, @unchained1 : index
util.global.store %arg0, @unchained1 : index
- return
+ util.return
}
// -----
@@ -83,7 +83,7 @@
util.global.store %c6, @immutable1 : index
util.return
}
-func.func @foo(%arg0: index) -> (index, index, index) {
+util.func @foo(%arg0: index) -> (index, index, index) {
// CHECK-DAG: %[[C5:.+]] = arith.constant 5
%0 = util.global.load @immutable0 : index
// CHECK-DAG: %[[C6:.+]] = arith.constant 6
@@ -93,7 +93,7 @@
// CHECK: util.global.store %arg0, @mutable
util.global.store %arg0, @mutable : index
// CHECK: return %[[C5]], %[[C6]], %[[MUTABLE]]
- return %0, %1, %2 : index, index, index
+ util.return %0, %1, %2 : index, index, index
}
// -----
@@ -102,7 +102,7 @@
util.global private mutable @used0 = 5 : index
// CHECK: util.global private mutable @used1 : index
util.global private mutable @used1 : index
-func.func @foo(%arg0: index, %arg1: index) -> (index, index) {
+util.func @foo(%arg0: index, %arg1: index) -> (index, index) {
// CHECK: %[[VALUE0:.+]] = util.global.load @used0 : index
%0 = util.global.load @used0 : index
// CHECK: %[[VALUE1:.+]] = util.global.load @used1 : index
@@ -112,7 +112,7 @@
// CHECK: util.global.store %arg1, @used1 : index
util.global.store %arg1, @used1 : index
// CHECK: return %[[VALUE0]], %[[VALUE1]]
- return %0, %1 : index, index
+ util.return %0, %1 : index, index
}
// -----
@@ -134,13 +134,13 @@
util.global private @dupeCst0 {inlining_policy = #util.inline.never} = 5 : index
// CHECK-NOT: util.global private @dupeCst1
util.global private @dupeCst1 {inlining_policy = #util.inline.never} = 5 : index
-func.func @foo() -> (index, index) {
+util.func @foo() -> (index, index) {
// CHECK-DAG: %[[VALUE0:.+]] = util.global.load immutable @dupeCst0
%0 = util.global.load @dupeCst0 : index
// CHECK-DAG: %[[VALUE1:.+]] = util.global.load immutable @dupeCst0
%1 = util.global.load @dupeCst1 : index
// CHECK: return %[[VALUE0]], %[[VALUE1]]
- return %0, %1 : index, index
+ util.return %0, %1 : index, index
}
// -----
@@ -155,11 +155,11 @@
util.global.store %c7, @nondupeCst1 : index
util.return
}
-func.func @foo() -> (index, index) {
+util.func @foo() -> (index, index) {
// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
%0 = util.global.load @nondupeCst0 : index
// CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
%1 = util.global.load @nondupeCst1 : index
// CHECK: return %[[C6]], %[[C7]]
- return %0, %1 : index, index
+ util.return %0, %1 : index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fuse_globals.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fuse_globals.mlir
index e77f49d..e7d0b45 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fuse_globals.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/fuse_globals.mlir
@@ -3,7 +3,7 @@
// CHECK: util.global private mutable @fusable0 : index
util.global private mutable @fusable0 : index
util.global private mutable @fusable1 : index
-func.func @foo(%arg0: index) -> (index, index) {
+util.func @foo(%arg0: index) -> (index, index) {
// CHECK: util.global.store %arg0, @fusable0
util.global.store %arg0, @fusable0 : index
// CHECK-NOT: util.global.store %arg0, @fusable1
@@ -12,8 +12,8 @@
%0 = util.global.load @fusable0 : index
// CHECK: %[[VALUE1:.+]] = util.global.load @fusable0 : index
%1 = util.global.load @fusable1 : index
- // CHECK: return %[[VALUE0]], %[[VALUE1]]
- return %0, %1 : index, index
+ // CHECK: util.return %[[VALUE0]], %[[VALUE1]]
+ util.return %0, %1 : index, index
}
// -----
@@ -24,7 +24,7 @@
util.global private mutable @unfusable0 : index
// CHECK: util.global private mutable @unfusable1 : index
util.global private mutable @unfusable1 : index
-func.func @nonuniform_a(%arg0: index) -> (index, index) {
+util.func @nonuniform_a(%arg0: index) -> (index, index) {
// CHECK: util.global.store %arg0, @unfusable0 : index
util.global.store %arg0, @unfusable0 : index
// CHECK: util.global.store %arg0, @unfusable1 : index
@@ -33,12 +33,12 @@
%0 = util.global.load @unfusable0 : index
// CHECK: %[[VALUE1:.+]] = util.global.load @unfusable1 : index
%1 = util.global.load @unfusable1 : index
- // CHECK: return %[[VALUE0]], %[[VALUE1]]
- return %0, %1 : index, index
+ // CHECK: util.return %[[VALUE0]], %[[VALUE1]]
+ util.return %0, %1 : index, index
}
-func.func @nonuniform_b(%arg0: index) {
+util.func @nonuniform_b(%arg0: index) {
util.global.store %arg0, @unfusable0 : index
- return
+ util.return
}
util.initializer {
%0 = "some.op"() : () -> index
@@ -54,7 +54,7 @@
util.global private mutable @unfusableInit0 = 5 : index
// CHECK: util.global private mutable @unfusableInit1 = 6 : index
util.global private mutable @unfusableInit1 = 6 : index
-func.func @initializer_mix(%arg0: index) -> (index, index) {
+util.func @initializer_mix(%arg0: index) -> (index, index) {
// CHECK: util.global.store %arg0, @unfusableInit0
util.global.store %arg0, @unfusableInit0 : index
// CHECK: util.global.store %arg0, @unfusableInit1
@@ -63,8 +63,8 @@
%0 = util.global.load @unfusableInit0 : index
// CHECK: %[[VALUE1:.+]] = util.global.load @unfusableInit1 : index
%1 = util.global.load @unfusableInit1 : index
- // CHECK: return %[[VALUE0]], %[[VALUE1]]
- return %0, %1 : index, index
+ // CHECK: util.return %[[VALUE0]], %[[VALUE1]]
+ util.return %0, %1 : index, index
}
// -----
@@ -73,14 +73,14 @@
util.global private mutable @unfusableDivergent0 : index
// CHECK: util.global private mutable @unfusableDivergent1
util.global private mutable @unfusableDivergent1 : index
-func.func @fn_a(%arg0: index) {
+util.func @fn_a(%arg0: index) {
util.global.store %arg0, @unfusableDivergent0 : index
util.global.store %arg0, @unfusableDivergent1 : index
- return
+ util.return
}
-func.func @fn_b(%arg0: index) {
+util.func @fn_b(%arg0: index) {
util.global.store %arg0, @unfusableDivergent0 : index
- return
+ util.return
}
// -----
@@ -103,9 +103,9 @@
util.global.store %v, @unfusableSubset2 : index
util.return
}
-// CHECK: func.func @mutate_unfusable(%[[ARG0:.+]]: index)
-func.func @mutate_unfusable(%arg0: index) {
+// CHECK: util.func public @mutate_unfusable(%[[ARG0:.+]]: index)
+util.func public @mutate_unfusable(%arg0: index) {
// CHECK: util.global.store %[[ARG0]], @unfusableSubset2
util.global.store %arg0, @unfusableSubset2 : index
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
index 2eacd77..d7af756 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/hoist_into_globals.mlir
@@ -3,16 +3,16 @@
// CHECK-LABEL: @hoist_simple_const_expr
module @hoist_simple_const_expr {
// CHECK: util.global private @[[HOISTED_SYM:.*]] : i32
- // CHECK: func.func @main
- func.func @main() -> (i32) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (i32) {
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
// CHECK-NOT: arith.constant
// CHECK-NOT: iree_unregistered.const_expr
// CHECK: %[[VAL:.*]] = util.global.load @[[HOISTED_SYM]] : i32
- // CHECK: return %[[VAL]]
+ // CHECK: util.return %[[VAL]]
%2 = "iree_unregistered.const_expr"(%0, %1) : (i32, i32) -> i32
- return %2 : i32
+ util.return %2 : i32
}
// CHECK: util.initializer attributes {iree.compiler.consteval} {
// CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -28,16 +28,16 @@
// checks.
// CHECK-LABEL: @do_not_hoist_variable_op
// CHECK-NOT: util.global
-// CHECK: func.func @main
+// CHECK: util.func public @main
// CHECK: %[[VAL:.*]] = "iree_unregistered.var_expr"
-// CHECK: return %[[VAL]]
+// CHECK: util.return %[[VAL]]
// CHECK-NOT: util.initializer
module @do_not_hoist_variable_op {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.var_expr"(%0, %1) : (i32, i32) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -46,10 +46,10 @@
// CHECK-NOT: util.global
// CHECK-NOT: util.initializer
module @do_not_hoist_variable_operands {
- func.func @main(%arg0 : i32) -> (i32) {
+ util.func public @main(%arg0 : i32) -> (i32) {
%0 = arith.constant 0 : i32
%2 = "iree_unregistered.const_expr"(%0, %arg0) : (i32, i32) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -58,10 +58,10 @@
// CHECK-NOT: util.global
// CHECK-NOT: util.initializer
module @do_not_hoist_sub_byte_aligned_scalar_leaf {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant 1 : i1
%2 = "iree_unregistered.var_expr"(%0) : (i1) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -70,10 +70,10 @@
// CHECK-NOT: util.global
// CHECK-NOT: util.initializer
module @do_not_hoist_sub_byte_aligned_tensor_leaf {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant dense<true> : tensor<i1>
%2 = "iree_unregistered.var_expr"(%0) : (tensor<i1>) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -83,10 +83,10 @@
// Can hoist a const-expr tree that transitively includes sub-byte aligned
// values.
module @hoist_sub_byte_aligned_scalar_transitive {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant 1 : i1
%2 = "iree_unregistered.const_expr"(%0) : (i1) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -96,10 +96,10 @@
// We presently expand i1 -> i8 for legacy reasons. As such, we support
// it, even though we don't generally support sub-byte constexprs.
module @hoist_i1_tensor_transitive {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant dense<true> : tensor<i1>
%2 = "iree_unregistered.const_expr"(%0) : (tensor<i1>) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
@@ -112,19 +112,19 @@
// CHECK: util.global private @latent_global : i32
util.global private @latent_global : i32
- // CHECK: func.func @main
- func.func @main() -> (i32, i32, i32) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (i32, i32, i32) {
// CHECK-DAG: %[[LOAD_HOISTED_0:.*]] = util.global.load @[[HOISTED_0]] : i32
// CHECK-DAG: %[[LOAD_HOISTED_1:.*]] = util.global.load @[[HOISTED_1]] : i32
// CHECK-DAG: %[[RESULT:.*]] = "iree_unregistered.var_expr"(%[[LOAD_HOISTED_1]])
- // CHECK: return %[[LOAD_HOISTED_0]], %[[LOAD_HOISTED_1]], %[[RESULT]]
+ // CHECK: util.return %[[LOAD_HOISTED_0]], %[[LOAD_HOISTED_1]], %[[RESULT]]
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.const_expr"(%0, %1) : (i32, i32) -> i32
%3 = util.global.load @latent_global : i32
%4 = "iree_unregistered.const_expr"(%2, %3) : (i32, i32) -> i32
%5 = "iree_unregistered.var_expr"(%4) : (i32) -> i32
- return %2, %4, %5 : i32, i32, i32
+ util.return %2, %4, %5 : i32, i32, i32
}
// CHECK: util.initializer attributes {iree.compiler.consteval} {
// CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -147,16 +147,16 @@
// CHECK-LABEL: @hoist_const_expr_with_ineligible_consumer
module @hoist_const_expr_with_ineligible_consumer {
// CHECK: util.global private @[[HOISTED_0:.*]] : i32
- // CHECK: func.func @main
- func.func @main() -> i32 {
+ // CHECK: util.func public @main
+ util.func public @main() -> i32 {
// CHECK-DAG: %[[LOAD_HOISTED_0:.*]] = util.global.load @[[HOISTED_0]] : i32
// CHECK-DAG: %[[RESULT:.*]] = "iree_unregistered.var_expr"(%[[LOAD_HOISTED_0]])
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.const_expr"(%0, %1) : (i32, i32) -> i32
%3 = "iree_unregistered.var_expr"(%2) : (i32) -> i32
- return %3 : i32
+ util.return %3 : i32
}
// CHECK: util.initializer attributes {iree.compiler.consteval} {
// CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -174,17 +174,17 @@
// CHECK-LABEL: @hoist_non_leaf_const_expr
module @hoist_non_leaf_const_expr {
// CHECK: util.global private @[[HOISTED:.*]] : i32
- // CHECK: func.func @main
- func.func @main() -> (i32) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (i32) {
// CHECK: %[[LOAD_HOISTED:.*]] = util.global.load @[[HOISTED]] : i32
// CHECK: %[[RESULT:.*]] = "iree_unregistered.non_leaf_const_expr"(%hoisted)
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.non_leaf_const_expr"(%0, %1) : (i32, i32) -> i32
%3 = "iree_unregistered.const_expr"(%2) : (i32) -> i32
%4 = "iree_unregistered.non_leaf_const_expr"(%3) : (i32) -> i32
- return %4 : i32
+ util.return %4 : i32
}
// CHECK: util.initializer attributes {iree.compiler.consteval} {
// CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -200,20 +200,20 @@
// CHECK-LABEL: @hoist_implicit_capture
module @hoist_implicit_capture {
// CHECK: util.global private @[[HOISTED_SYM:.*]] : i32
- // CHECK: func.func @main
- func.func @main() -> (i32) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (i32) {
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
// CHECK-NOT: arith.constant
// CHECK-NOT: iree_unregistered.const_expr
// CHECK: %[[VAL:.*]] = util.global.load @[[HOISTED_SYM]] : i32
- // CHECK: return %[[VAL]]
+ // CHECK: util.return %[[VAL]]
%2 = "iree_unregistered.const_expr"(%0) ({
^bb0(%inner0 : i32):
%3 = arith.addi %inner0, %1 : i32
"iree_unregistered.yield"(%3) : (i32) -> i32
}) : (i32) -> i32
- return %2 : i32
+ util.return %2 : i32
}
// Key checks: arith.constant 1 gets pulled in to the initializer
// and the reference is updated correctly in the custom op region.
@@ -233,24 +233,24 @@
// CHECK-NOT: util.global
// CHECK-NOT: util.initializer
module @do_not_hoist_non_value_type_results {
- func.func @main() -> (!iree_unregistered.unknown_type) {
+ util.func public @main() -> (!iree_unregistered.unknown_type) {
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.const_expr"(%0, %1) : (i32, i32) -> !iree_unregistered.unknown_type
- return %2 : !iree_unregistered.unknown_type
+ util.return %2 : !iree_unregistered.unknown_type
}
}
// -----
module @do_not_hoist_uses_within_dispatches {
- func.func @main() -> (tensor<i32>) {
+ util.func public @main() -> (tensor<i32>) {
%cst = arith.constant dense<[2, 3]>: tensor<2xi32>
%result = flow.dispatch.region -> (tensor<i32>) {
%slice = tensor.extract_slice %cst[0] [1] [1] : tensor<2xi32> to tensor<i32>
flow.return %slice : tensor<i32>
}
- return %result : tensor<i32>
+ util.return %result : tensor<i32>
}
}
// CHECK-LABEL: @do_not_hoist_uses_within_dispatches
@@ -258,12 +258,12 @@
// CHECK: %[[RESULT:.+]] = flow.dispatch.region
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[CST]]
// CHECK: flow.return %[[SLICE]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
#map = affine_map<(d0, d1) -> (d0, d1)>
module @do_not_hoist_uses_within_dispatches {
- func.func @main() -> tensor<2x2xi32> {
+ util.func public @main() -> tensor<2x2xi32> {
%0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
%1 = arith.constant dense<[[6, 7], [8,9]]> : tensor<2x2xi32>
%expanded = tensor.expand_shape %0[[0, 1]] : tensor<4xi32> into tensor<2x2xi32>
@@ -276,7 +276,7 @@
} -> tensor<2x2xi32>
flow.return %4 : tensor<2x2xi32>
}
- return %3 : tensor<2x2xi32>
+ util.return %3 : tensor<2x2xi32>
}
}
// CHECK-LABEL: @do_not_hoist_uses_within_dispatches
@@ -286,7 +286,7 @@
// CHECK: %[[ADD:.+]] = linalg.generic
// CHECK-SAME: %[[EXPANDED]]
// CHECK: flow.return %[[ADD]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
@@ -299,12 +299,12 @@
// CHECK: util.global
// CHECK: util.initializer
module @hoist_no_significant_size_increase_const_expr {
- func.func @main() -> (tensor<128xi8>) {
+ util.func public @main() -> (tensor<128xi8>) {
%0 = arith.constant dense<0> : tensor<32xi8>
%1 = arith.constant dense<0> : tensor<32xi8>
%2 = "iree_unregistered.const_expr"(%0, %1)
- : (tensor<32xi8>, tensor<32xi8>) -> tensor<128xi8>
- return %2 : tensor<128xi8>
+ : (tensor<32xi8>, tensor<32xi8>) -> tensor<128xi8>
+ util.return %2 : tensor<128xi8>
}
}
@@ -316,12 +316,12 @@
// CHECK-NOT: util.global
// CHECK-NOT: util.initializer
module @do_not_hoist_significant_size_increase_const_expr {
- func.func @main() -> (tensor<129xi8>) {
+ util.func public @main() -> (tensor<129xi8>) {
%0 = arith.constant dense<0> : tensor<32xi8>
%1 = arith.constant dense<0> : tensor<32xi8>
%2 = "iree_unregistered.const_expr"(%0, %1)
- : (tensor<32xi8>, tensor<32xi8>) -> tensor<129xi8>
- return %2 : tensor<129xi8>
+ : (tensor<32xi8>, tensor<32xi8>) -> tensor<129xi8>
+ util.return %2 : tensor<129xi8>
}
}
@@ -335,11 +335,11 @@
// CHECK-NOT: util.initializer
module @nested_program_const_expr {
module {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant 0 : i32
%1 = arith.constant 1 : i32
%2 = "iree_unregistered.const_expr"(%0, %1) : (i32, i32) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
}
@@ -353,9 +353,9 @@
// CHECK: util.initializer {
// CHECK-NEXT: util.global.load @parameter_constant
util.global private @parameter_constant = #stream.parameter.named<"compile"::"constant_hoisted_0"> : i32
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%load = util.global.load @parameter_constant : i32
%1 = "iree_unregistered.const_expr"(%load) : (i32) -> i32
- return %1 : i32
+ util.return %1 : i32
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/import_resources.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/import_resources.mlir
index e8b3b50..06360c7 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/import_resources.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/import_resources.mlir
@@ -1,89 +1,89 @@
// RUN: iree-opt --split-input-file --iree-util-import-resources %s | FileCheck %s
-// CHECK-LABEL: func.func @constant_splat_i64
-func.func @constant_splat_i64() -> tensor<4xi64> {
+// CHECK-LABEL: util.func public @constant_splat_i64
+util.func public @constant_splat_i64() -> tensor<4xi64> {
// Splats should not convert.
// CHECK-NEXT: constant dense<123>
%c123 = arith.constant dense<123> : tensor<4xi64>
- return %c123 : tensor<4xi64>
+ util.return %c123 : tensor<4xi64>
}
// -----
-// CHECK-LABEL: func.func @dense_i1
-func.func @dense_i1() -> tensor<4xi1> {
+// CHECK-LABEL: util.func public @dense_i1
+util.func public @dense_i1() -> tensor<4xi1> {
// CHECK: dense_resource<dense_elements_i1>
%c123 = arith.constant dense<[true, false, false, true]> : tensor<4xi1>
- return %c123 : tensor<4xi1>
+ util.return %c123 : tensor<4xi1>
}
// CHECK: dense_elements_i1: "0x4000000001000001"
// -----
-// CHECK-LABEL: func.func @dense_i8
-func.func @dense_i8() -> tensor<4xi8> {
+// CHECK-LABEL: util.func public @dense_i8
+util.func public @dense_i8() -> tensor<4xi8> {
// CHECK: dense_resource<dense_elements_i8>
%c123 = arith.constant dense<[1, 2, 3, 127]> : tensor<4xi8>
- return %c123 : tensor<4xi8>
+ util.return %c123 : tensor<4xi8>
}
// CHECK: dense_elements_i8: "0x400000000102037F"
// -----
-// CHECK-LABEL: func.func @dense_i16
-func.func @dense_i16() -> tensor<4xi16> {
+// CHECK-LABEL: util.func public @dense_i16
+util.func public @dense_i16() -> tensor<4xi16> {
// CHECK: dense_resource<dense_elements_i16>
%c123 = arith.constant dense<[1, 2, 3, 127]> : tensor<4xi16>
- return %c123 : tensor<4xi16>
+ util.return %c123 : tensor<4xi16>
}
// CHECK: dense_elements_i16: "0x400000000100020003007F00"
// -----
-// CHECK-LABEL: func.func @dense_i32
-func.func @dense_i32() -> tensor<4xi32> {
+// CHECK-LABEL: util.func public @dense_i32
+util.func public @dense_i32() -> tensor<4xi32> {
// CHECK: dense_resource<dense_elements_i32>
%c123 = arith.constant dense<[1, 2, 3, 127]> : tensor<4xi32>
- return %c123 : tensor<4xi32>
+ util.return %c123 : tensor<4xi32>
}
// CHECK: dense_elements_i32: "0x400000000100000002000000030000007F000000"
// -----
-// CHECK-LABEL: func.func @dense_i64
-func.func @dense_i64() -> tensor<4xi64> {
+// CHECK-LABEL: util.func public @dense_i64
+util.func public @dense_i64() -> tensor<4xi64> {
// CHECK: dense_resource<dense_elements_i64>
%c123 = arith.constant dense<[1, 2, 3, 127]> : tensor<4xi64>
- return %c123 : tensor<4xi64>
+ util.return %c123 : tensor<4xi64>
}
// CHECK: dense_elements_i64: "0x400000000100000000000000020000000000000003000000000000007F00000000000000"
// -----
-// CHECK-LABEL: func.func @dense_f16
-func.func @dense_f16() -> tensor<4xf16> {
+// CHECK-LABEL: util.func public @dense_f16
+util.func public @dense_f16() -> tensor<4xf16> {
// CHECK: dense_resource<dense_elements_f16>
%c123 = arith.constant dense<[1.1, 2.2, 3.3, 0.0]> : tensor<4xf16>
- return %c123 : tensor<4xf16>
+ util.return %c123 : tensor<4xf16>
}
// CHECK: dense_elements_f16: "0x40000000663C66409A420000"
// -----
-// CHECK-LABEL: func.func @dense_f32
-func.func @dense_f32() -> tensor<4xf32> {
+// CHECK-LABEL: util.func public @dense_f32
+util.func public @dense_f32() -> tensor<4xf32> {
// CHECK: dense_resource<dense_elements_f32>
%c123 = arith.constant dense<[1.1, 2.2, 3.3, 0.0]> : tensor<4xf32>
- return %c123 : tensor<4xf32>
+ util.return %c123 : tensor<4xf32>
}
// CHECK: dense_elements_f32: "0x40000000CDCC8C3FCDCC0C403333534000000000"
// -----
-// CHECK-LABEL: func.func @dense_f64
-func.func @dense_f64() -> tensor<4xf64> {
+// CHECK-LABEL: util.func public @dense_f64
+util.func public @dense_f64() -> tensor<4xf64> {
// CHECK: dense_resource<dense_elements_f64>
%c123 = arith.constant dense<[1.1, 2.2, 3.3, 0.0]> : tensor<4xf64>
- return %c123 : tensor<4xf64>
+ util.return %c123 : tensor<4xf64>
}
// CHECK: dense_elements_f64: "0x400000009A9999999999F13F9A999999999901406666666666660A400000000000000000"
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/ipo.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/ipo.mlir
index 631dc40..27ead2b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/ipo.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/ipo.mlir
@@ -2,99 +2,99 @@
// Tests that unused args get dropped.
-// CHECK-LABEL: func.func private @unused_arg_callee
+// CHECK-LABEL: util.func private @unused_arg_callee
// CHECK-SAME: (%[[ARG1:.+]]: index) -> index
-func.func private @unused_arg_callee(%arg0: index, %arg1: index) -> index {
+util.func private @unused_arg_callee(%arg0: index, %arg1: index) -> index {
// CHECK: %[[ADD:.+]] = arith.addi %[[ARG1]], %[[ARG1]]
%add = arith.addi %arg1, %arg1 : index
- // CHECK: return %[[ADD]]
- return %add : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
}
-// CHECK: func.func @unused_arg_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
-func.func @unused_arg_caller_a(%arg0: index, %arg1: index) -> (index, index) {
- // CHECK: %[[A_RET0:.+]] = call @unused_arg_callee(%[[A_ARG0]]) : (index) -> index
- %ret0 = call @unused_arg_callee(%arg0, %arg0) : (index, index) -> index
- // CHECK: %[[A_RET1:.+]] = call @unused_arg_callee(%[[A_ARG1]]) : (index) -> index
- %ret1 = call @unused_arg_callee(%arg0, %arg1) : (index, index) -> index
- // CHECK: return %[[A_RET0]], %[[A_RET1]]
- return %ret0, %ret1 : index, index
+// CHECK: util.func public @unused_arg_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
+util.func public @unused_arg_caller_a(%arg0: index, %arg1: index) -> (index, index) {
+ // CHECK: %[[A_RET0:.+]] = util.call @unused_arg_callee(%[[A_ARG0]]) : (index) -> index
+ %ret0 = util.call @unused_arg_callee(%arg0, %arg0) : (index, index) -> index
+ // CHECK: %[[A_RET1:.+]] = util.call @unused_arg_callee(%[[A_ARG1]]) : (index) -> index
+ %ret1 = util.call @unused_arg_callee(%arg0, %arg1) : (index, index) -> index
+ // CHECK: util.return %[[A_RET0]], %[[A_RET1]]
+ util.return %ret0, %ret1 : index, index
}
-// CHECK: func.func @unused_arg_caller_b(%[[B_ARG0:.+]]: index)
-func.func @unused_arg_caller_b(%arg0: index) -> index {
- // CHECK: %[[B_RET0:.+]] = call @unused_arg_callee(%[[B_ARG0]]) : (index) -> index
- %ret0 = call @unused_arg_callee(%arg0, %arg0) : (index, index) -> index
- // CHECK: return %[[B_RET0]]
- return %ret0 : index
+// CHECK: util.func public @unused_arg_caller_b(%[[B_ARG0:.+]]: index)
+util.func public @unused_arg_caller_b(%arg0: index) -> index {
+ // CHECK: %[[B_RET0:.+]] = util.call @unused_arg_callee(%[[B_ARG0]]) : (index) -> index
+ %ret0 = util.call @unused_arg_callee(%arg0, %arg0) : (index, index) -> index
+ // CHECK: util.return %[[B_RET0]]
+ util.return %ret0 : index
}
// -----
// Tests that uniformly unused results get dropped.
-// CHECK-LABEL: func.func private @unused_result_callee
+// CHECK-LABEL: util.func private @unused_result_callee
// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: index) -> index
-func.func private @unused_result_callee(%arg0: index, %arg1: index) -> (index, index) {
+util.func private @unused_result_callee(%arg0: index, %arg1: index) -> (index, index) {
// CHECK: %[[ADD0:.+]] = arith.addi %[[ARG0]], %[[ARG1]]
%add0 = arith.addi %arg0, %arg1 : index
// CHECK: %[[ADD1:.+]] = arith.addi %[[ADD0]], %[[ARG0]]
%add1 = arith.addi %add0, %arg0 : index
- // CHECK: return %[[ADD1]]
- return %add0, %add1 : index, index
+ // CHECK: util.return %[[ADD1]]
+ util.return %add0, %add1 : index, index
}
-// CHECK: func.func @unused_result_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
-func.func @unused_result_caller_a(%arg0: index, %arg1: index) -> index {
- // CHECK: %[[A_RET1:.+]] = call @unused_result_callee(%[[A_ARG0]], %[[A_ARG1]]) : (index, index) -> index
- %ret:2 = call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
- // CHECK: return %[[A_RET1]]
- return %ret#1 : index
+// CHECK: util.func public @unused_result_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
+util.func public @unused_result_caller_a(%arg0: index, %arg1: index) -> index {
+ // CHECK: %[[A_RET1:.+]] = util.call @unused_result_callee(%[[A_ARG0]], %[[A_ARG1]]) : (index, index) -> index
+ %ret:2 = util.call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
+ // CHECK: util.return %[[A_RET1]]
+ util.return %ret#1 : index
}
-// CHECK: func.func @unused_result_caller_b(%[[B_ARG0:.+]]: index, %[[B_ARG1:.+]]: index)
-func.func @unused_result_caller_b(%arg0: index, %arg1: index) -> index {
- // CHECK: %[[B_RET1:.+]] = call @unused_result_callee(%[[B_ARG0]], %[[B_ARG1]]) : (index, index) -> index
- %ret:2 = call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
- // CHECK: return %[[B_RET1]]
- return %ret#1 : index
+// CHECK: util.func public @unused_result_caller_b(%[[B_ARG0:.+]]: index, %[[B_ARG1:.+]]: index)
+util.func public @unused_result_caller_b(%arg0: index, %arg1: index) -> index {
+ // CHECK: %[[B_RET1:.+]] = util.call @unused_result_callee(%[[B_ARG0]], %[[B_ARG1]]) : (index, index) -> index
+ %ret:2 = util.call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
+ // CHECK: util.return %[[B_RET1]]
+ util.return %ret#1 : index
}
-// CHECK: func.func @unused_result_caller_c(%[[C_ARG0:.+]]: index, %[[C_ARG1:.+]]: index)
-func.func @unused_result_caller_c(%arg0: index, %arg1: index) {
- // CHECK: %[[C_RET1:.+]] = call @unused_result_callee(%[[C_ARG0]], %[[C_ARG1]]) : (index, index) -> index
- %ret:2 = call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
- // CHECK: return
- return
+// CHECK: util.func public @unused_result_caller_c(%[[C_ARG0:.+]]: index, %[[C_ARG1:.+]]: index)
+util.func public @unused_result_caller_c(%arg0: index, %arg1: index) {
+ // CHECK: %[[C_RET1:.+]] = util.call @unused_result_callee(%[[C_ARG0]], %[[C_ARG1]]) : (index, index) -> index
+ %ret:2 = util.call @unused_result_callee(%arg0, %arg1) : (index, index) -> (index, index)
+ // CHECK: util.return
+ util.return
}
// -----
// Tests that uniformly duplicate args get combined.
-// CHECK-LABEL: func.func private @dupe_arg_callee
+// CHECK-LABEL: util.func private @dupe_arg_callee
// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: index) -> index
-func.func private @dupe_arg_callee(%arg0: index, %arg1: index, %arg0_dupe: index) -> index {
+util.func private @dupe_arg_callee(%arg0: index, %arg1: index, %arg0_dupe: index) -> index {
// CHECK: %[[ADD0:.+]] = arith.addi %[[ARG0]], %[[ARG1]]
%add0 = arith.addi %arg0, %arg1 : index
// CHECK: %[[ADD1:.+]] = arith.addi %[[ADD0]], %[[ARG0]]
%add1 = arith.addi %add0, %arg0_dupe : index
- // CHECK: return %[[ADD1]]
- return %add1 : index
+ // CHECK: util.return %[[ADD1]]
+ util.return %add1 : index
}
-// CHECK: func.func @dupe_arg_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
-func.func @dupe_arg_caller_a(%arg0: index, %arg1: index) -> (index, index) {
- // CHECK: %[[A_RET0:.+]] = call @dupe_arg_callee(%[[A_ARG0]], %[[A_ARG0]]) : (index, index) -> index
- %ret0 = call @dupe_arg_callee(%arg0, %arg0, %arg0) : (index, index, index) -> index
- // CHECK: %[[A_RET1:.+]] = call @dupe_arg_callee(%[[A_ARG0]], %[[A_ARG1]]) : (index, index) -> index
- %ret1 = call @dupe_arg_callee(%arg0, %arg1, %arg0) : (index, index, index) -> index
- // CHECK: return %[[A_RET0]], %[[A_RET1]]
- return %ret0, %ret1 : index, index
+// CHECK: util.func public @dupe_arg_caller_a(%[[A_ARG0:.+]]: index, %[[A_ARG1:.+]]: index)
+util.func public @dupe_arg_caller_a(%arg0: index, %arg1: index) -> (index, index) {
+ // CHECK: %[[A_RET0:.+]] = util.call @dupe_arg_callee(%[[A_ARG0]], %[[A_ARG0]]) : (index, index) -> index
+ %ret0 = util.call @dupe_arg_callee(%arg0, %arg0, %arg0) : (index, index, index) -> index
+ // CHECK: %[[A_RET1:.+]] = util.call @dupe_arg_callee(%[[A_ARG0]], %[[A_ARG1]]) : (index, index) -> index
+ %ret1 = util.call @dupe_arg_callee(%arg0, %arg1, %arg0) : (index, index, index) -> index
+ // CHECK: util.return %[[A_RET0]], %[[A_RET1]]
+ util.return %ret0, %ret1 : index, index
}
-// CHECK: func.func @dupe_arg_caller_b(%[[B_ARG0:.+]]: index)
-func.func @dupe_arg_caller_b(%arg0: index) -> index {
- // CHECK: %[[B_RET0:.+]] = call @dupe_arg_callee(%[[B_ARG0]], %[[B_ARG0]]) : (index, index) -> index
- %ret0 = call @dupe_arg_callee(%arg0, %arg0, %arg0) : (index, index, index) -> index
- // CHECK: return %[[B_RET0]]
- return %ret0 : index
+// CHECK: util.func public @dupe_arg_caller_b(%[[B_ARG0:.+]]: index)
+util.func public @dupe_arg_caller_b(%arg0: index) -> index {
+ // CHECK: %[[B_RET0:.+]] = util.call @dupe_arg_callee(%[[B_ARG0]], %[[B_ARG0]]) : (index, index) -> index
+ %ret0 = util.call @dupe_arg_callee(%arg0, %arg0, %arg0) : (index, index, index) -> index
+ // CHECK: util.return %[[B_RET0]]
+ util.return %ret0 : index
}
// -----
@@ -103,203 +103,203 @@
// that base argument stays live. Note that %arg0 is not used in the callee
// but a duplicate of it is.
-// CHECK-LABEL: func.func private @dupe_unused_arg_callee
+// CHECK-LABEL: util.func private @dupe_unused_arg_callee
// CHECK-SAME: (%[[CALLEE_ARG0:.+]]: index) -> index
-func.func private @dupe_unused_arg_callee(%arg0: index, %arg0_dupe: index) -> (index, index) {
+util.func private @dupe_unused_arg_callee(%arg0: index, %arg0_dupe: index) -> (index, index) {
// CHECK: %[[CALLEE_RET0:.+]] = arith.addi %[[CALLEE_ARG0]], %[[CALLEE_ARG0]]
%ret0 = arith.addi %arg0_dupe, %arg0_dupe : index
- // CHECK: return %[[CALLEE_RET0]]
- return %ret0, %arg0 : index, index
+ // CHECK: util.return %[[CALLEE_RET0]]
+ util.return %ret0, %arg0 : index, index
}
-// CHECK: func.func @dupe_unused_arg_caller(%[[CALLER_ARG0:.+]]: index)
-func.func @dupe_unused_arg_caller(%arg0: index) -> (index, index) {
- // CHECK: %[[CALLER_RET0:.+]] = call @dupe_unused_arg_callee(%[[CALLER_ARG0]]) : (index) -> index
- %ret:2 = call @dupe_unused_arg_callee(%arg0, %arg0) : (index, index) -> (index, index)
- // CHECK: return %[[CALLER_RET0]], %[[CALLER_ARG0]]
- return %ret#0, %ret#1 : index, index
+// CHECK: util.func public @dupe_unused_arg_caller(%[[CALLER_ARG0:.+]]: index)
+util.func public @dupe_unused_arg_caller(%arg0: index) -> (index, index) {
+ // CHECK: %[[CALLER_RET0:.+]] = util.call @dupe_unused_arg_callee(%[[CALLER_ARG0]]) : (index) -> index
+ %ret:2 = util.call @dupe_unused_arg_callee(%arg0, %arg0) : (index, index) -> (index, index)
+ // CHECK: util.return %[[CALLER_RET0]], %[[CALLER_ARG0]]
+ util.return %ret#0, %ret#1 : index, index
}
// -----
// Tests that uniformly duplicate results get combined.
-// CHECK-LABEL: func.func private @dupe_result_callee
+// CHECK-LABEL: util.func private @dupe_result_callee
// CHECK-SAME: (%[[ARG0:.+]]: i1, %[[ARG1:.+]]: index) -> (index, index)
-func.func private @dupe_result_callee(%arg0: i1, %arg1: index) -> (index, index, index) {
+util.func private @dupe_result_callee(%arg0: i1, %arg1: index) -> (index, index, index) {
// CHECK: %[[ADD0:.+]] = arith.addi %[[ARG1]], %[[ARG1]]
%add0 = arith.addi %arg1, %arg1 : index
// CHECK: %[[ADD1:.+]] = arith.addi %[[ADD0]], %[[ARG1]]
%add1 = arith.addi %add0, %arg1 : index
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
- // CHECK: return %[[ADD0]], %[[ADD0]]
- return %add0, %add0, %add0 : index, index, index
+ // CHECK: util.return %[[ADD0]], %[[ADD0]]
+ util.return %add0, %add0, %add0 : index, index, index
^bb2:
- // CHECK: return %[[ADD0]], %[[ADD1]]
- return %add0, %add1, %add0 : index, index, index
+ // CHECK: util.return %[[ADD0]], %[[ADD1]]
+ util.return %add0, %add1, %add0 : index, index, index
}
-// CHECK: func.func @dupe_result_caller(%[[ARG0:.+]]: i1, %[[ARG1:.+]]: index)
-func.func @dupe_result_caller(%arg0: i1, %arg1: index) -> (index, index, index) {
- // CHECK: %[[RET:.+]]:2 = call @dupe_result_callee(%[[ARG0]], %[[ARG1]]) : (i1, index) -> (index, index)
- %ret:3 = call @dupe_result_callee(%arg0, %arg1) : (i1, index) -> (index, index, index)
- // CHECK: return %[[RET]]#0, %[[RET]]#1, %[[RET]]#0
- return %ret#0, %ret#1, %ret#2 : index, index, index
+// CHECK: util.func public @dupe_result_caller(%[[ARG0:.+]]: i1, %[[ARG1:.+]]: index)
+util.func public @dupe_result_caller(%arg0: i1, %arg1: index) -> (index, index, index) {
+ // CHECK: %[[RET:.+]]:2 = util.call @dupe_result_callee(%[[ARG0]], %[[ARG1]]) : (i1, index) -> (index, index)
+ %ret:3 = util.call @dupe_result_callee(%arg0, %arg1) : (i1, index) -> (index, index, index)
+ // CHECK: util.return %[[RET]]#0, %[[RET]]#1, %[[RET]]#0
+ util.return %ret#0, %ret#1, %ret#2 : index, index, index
}
// -----
// Tests that uniformly constant args get inlined into callees.
-// CHECK-LABEL: func.func private @uniform_arg_callee
+// CHECK-LABEL: util.func private @uniform_arg_callee
// CHECK-SAME: () -> index
-func.func private @uniform_arg_callee(%arg0: index) -> index {
+util.func private @uniform_arg_callee(%arg0: index) -> index {
// CHECK: %[[C1:.+]] = arith.constant 1
// CHECK: %[[ADD:.+]] = arith.addi %[[C1]], %[[C1]]
%add = arith.addi %arg0, %arg0 : index
- // CHECK: return %[[ADD]]
- return %add : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
}
-// CHECK: func.func @uniform_arg_caller_a
-func.func @uniform_arg_caller_a() -> (index, index) {
+// CHECK: util.func public @uniform_arg_caller_a
+util.func public @uniform_arg_caller_a() -> (index, index) {
%c1 = arith.constant 1 : index
- // CHECK: %[[A_RET0:.+]] = call @uniform_arg_callee() : () -> index
- %ret0 = call @uniform_arg_callee(%c1) : (index) -> index
- // CHECK: %[[A_RET1:.+]] = call @uniform_arg_callee() : () -> index
- %ret1 = call @uniform_arg_callee(%c1) : (index) -> index
- // CHECK: return %[[A_RET0]], %[[A_RET1]]
- return %ret0, %ret1 : index, index
+ // CHECK: %[[A_RET0:.+]] = util.call @uniform_arg_callee() : () -> index
+ %ret0 = util.call @uniform_arg_callee(%c1) : (index) -> index
+ // CHECK: %[[A_RET1:.+]] = util.call @uniform_arg_callee() : () -> index
+ %ret1 = util.call @uniform_arg_callee(%c1) : (index) -> index
+ // CHECK: util.return %[[A_RET0]], %[[A_RET1]]
+ util.return %ret0, %ret1 : index, index
}
-// CHECK: func.func @uniform_arg_caller_b
-func.func @uniform_arg_caller_b() -> index {
+// CHECK: util.func public @uniform_arg_caller_b
+util.func public @uniform_arg_caller_b() -> index {
%c1 = arith.constant 1 : index
- // CHECK: %[[B_RET0:.+]] = call @uniform_arg_callee() : () -> index
- %ret0 = call @uniform_arg_callee(%c1) : (index) -> index
- // CHECK: return %[[B_RET0]]
- return %ret0 : index
+ // CHECK: %[[B_RET0:.+]] = util.call @uniform_arg_callee() : () -> index
+ %ret0 = util.call @uniform_arg_callee(%c1) : (index) -> index
+ // CHECK: util.return %[[B_RET0]]
+ util.return %ret0 : index
}
// -----
// Tests that uniformly constant results get inlined into callers.
-// CHECK-LABEL: func.func private @uniform_result_callee
+// CHECK-LABEL: util.func private @uniform_result_callee
// CHECK-SAME: (%[[ARG0:.+]]: i1)
-func.func private @uniform_result_callee(%arg0: i1) -> index {
+util.func private @uniform_result_callee(%arg0: i1) -> index {
%c0 = arith.constant 0 : index
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
- // CHECK: return
- return %c0 : index
+ // CHECK: util.return
+ util.return %c0 : index
^bb2:
- // CHECK: return
- return %c0 : index
+ // CHECK: util.return
+ util.return %c0 : index
}
-// CHECK: func.func @uniform_result_caller(%[[ARG0:.+]]: i1)
-func.func @uniform_result_caller(%arg0: i1) -> index {
+// CHECK: util.func public @uniform_result_caller(%[[ARG0:.+]]: i1)
+util.func public @uniform_result_caller(%arg0: i1) -> index {
// CHECK: call @uniform_result_callee(%[[ARG0]]) : (i1) -> ()
- %ret0 = call @uniform_result_callee(%arg0) : (i1) -> index
+ %ret0 = util.call @uniform_result_callee(%arg0) : (i1) -> index
// CHECK: %[[C0:.+]] = arith.constant 0
- // CHECK: return %[[C0]]
- return %ret0 : index
+ // CHECK: util.return %[[C0]]
+ util.return %ret0 : index
}
// -----
// Tests that uniformly duplicate constant results get combined/inlined.
-// CHECK-LABEL: func.func private @dupe_constant_result_callee
+// CHECK-LABEL: util.func private @dupe_constant_result_callee
// CHECK-SAME: (%[[ARG0:.+]]: i1) -> index
-func.func private @dupe_constant_result_callee(%arg0: i1) -> (index, index, index) {
+util.func private @dupe_constant_result_callee(%arg0: i1) -> (index, index, index) {
// CHECK: %[[C0:.+]] = arith.constant 0
%c0 = arith.constant 0 : index
// CHECK: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : index
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
- // CHECK: return %[[C0]]
- return %c0, %c0, %c0 : index, index, index
+ // CHECK: util.return %[[C0]]
+ util.return %c0, %c0, %c0 : index, index, index
^bb2:
- // CHECK: return %[[C1]]
- return %c0, %c1, %c0 : index, index, index
+ // CHECK: util.return %[[C1]]
+ util.return %c0, %c1, %c0 : index, index, index
}
-// CHECK: func.func @dupe_constant_result_caller(%[[ARG0:.+]]: i1)
-func.func @dupe_constant_result_caller(%arg0: i1) -> (index, index, index) {
- // CHECK: %[[RET:.+]] = call @dupe_constant_result_callee(%[[ARG0]]) : (i1) -> index
- %ret:3 = call @dupe_constant_result_callee(%arg0) : (i1) -> (index, index, index)
+// CHECK: util.func public @dupe_constant_result_caller(%[[ARG0:.+]]: i1)
+util.func public @dupe_constant_result_caller(%arg0: i1) -> (index, index, index) {
+ // CHECK: %[[RET:.+]] = util.call @dupe_constant_result_callee(%[[ARG0]]) : (i1) -> index
+ %ret:3 = util.call @dupe_constant_result_callee(%arg0) : (i1) -> (index, index, index)
// CHECK: %[[C0_INLINE:.+]] = arith.constant 0
// CHECK-NEXT: %[[C0_INLINE_DUPE:.+]] = arith.constant 0
- // CHECK: return %[[C0_INLINE]], %[[RET]], %[[C0_INLINE_DUPE]]
- return %ret#0, %ret#1, %ret#2 : index, index, index
+ // CHECK: util.return %[[C0_INLINE]], %[[RET]], %[[C0_INLINE_DUPE]]
+ util.return %ret#0, %ret#1, %ret#2 : index, index, index
}
// -----
// Tests that public functions are unmodified (the unused arg is not dropped).
-// CHECK-LABEL: func.func public @public_unused_arg
+// CHECK-LABEL: util.func public @public_unused_arg
// CHECK-SAME: (%[[ARG0:.+]]: index)
-func.func public @public_unused_arg(%arg0: index) {
- return
+util.func public @public_unused_arg(%arg0: index) {
+ util.return
}
// -----
// Tests that non-uniform call args don't get optimized.
-// CHECK-LABEL: func.func private @nonuniform_arg_callee
+// CHECK-LABEL: util.func private @nonuniform_arg_callee
// CHECK-SAME: (%[[ARG0:.+]]: index) -> index
-func.func private @nonuniform_arg_callee(%arg0: index) -> index {
+util.func private @nonuniform_arg_callee(%arg0: index) -> index {
// CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
%add = arith.addi %arg0, %arg0 : index
- // CHECK: return %[[ADD]]
- return %add : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
}
-// CHECK: func.func @nonuniform_arg_caller_a(%[[A_ARG0:.+]]: index)
-func.func @nonuniform_arg_caller_a(%arg0: index) -> (index, index) {
- // CHECK: %[[A_RET0:.+]] = call @nonuniform_arg_callee(%[[A_ARG0]]) : (index) -> index
- %ret0 = call @nonuniform_arg_callee(%arg0) : (index) -> index
- // CHECK: %[[A_RET1:.+]] = call @nonuniform_arg_callee(%[[A_ARG0]]) : (index) -> index
- %ret1 = call @nonuniform_arg_callee(%arg0) : (index) -> index
- // CHECK: return %[[A_RET0]], %[[A_RET1]]
- return %ret0, %ret1 : index, index
+// CHECK: util.func public @nonuniform_arg_caller_a(%[[A_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller_a(%arg0: index) -> (index, index) {
+ // CHECK: %[[A_RET0:.+]] = util.call @nonuniform_arg_callee(%[[A_ARG0]]) : (index) -> index
+ %ret0 = util.call @nonuniform_arg_callee(%arg0) : (index) -> index
+ // CHECK: %[[A_RET1:.+]] = util.call @nonuniform_arg_callee(%[[A_ARG0]]) : (index) -> index
+ %ret1 = util.call @nonuniform_arg_callee(%arg0) : (index) -> index
+ // CHECK: util.return %[[A_RET0]], %[[A_RET1]]
+ util.return %ret0, %ret1 : index, index
}
-// CHECK: func.func @nonuniform_arg_caller_b(%[[B_ARG0:.+]]: index)
-func.func @nonuniform_arg_caller_b(%arg0: index) -> index {
- // CHECK: %[[B_RET0:.+]] = call @nonuniform_arg_callee(%[[B_ARG0]]) : (index) -> index
- %ret0 = call @nonuniform_arg_callee(%arg0) : (index) -> index
- // CHECK: return %[[B_RET0]]
- return %ret0 : index
+// CHECK: util.func public @nonuniform_arg_caller_b(%[[B_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller_b(%arg0: index) -> index {
+ // CHECK: %[[B_RET0:.+]] = util.call @nonuniform_arg_callee(%[[B_ARG0]]) : (index) -> index
+ %ret0 = util.call @nonuniform_arg_callee(%arg0) : (index) -> index
+ // CHECK: util.return %[[B_RET0]]
+ util.return %ret0 : index
}
// -----
// Tests that non-uniform call args w/ constants don't get optimized.
-// CHECK-LABEL: func.func private @nonuniform_constant_arg_callee
+// CHECK-LABEL: util.func private @nonuniform_constant_arg_callee
// CHECK-SAME: (%[[ARG0:.+]]: index) -> index
-func.func private @nonuniform_constant_arg_callee(%arg0: index) -> index {
+util.func private @nonuniform_constant_arg_callee(%arg0: index) -> index {
// CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
%add = arith.addi %arg0, %arg0 : index
- // CHECK: return %[[ADD]]
- return %add : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
}
-// CHECK: func.func @nonuniform_arg_caller(%[[CALLER_ARG0:.+]]: index)
-func.func @nonuniform_arg_caller(%arg0: index) -> (index, index) {
+// CHECK: util.func public @nonuniform_arg_caller(%[[CALLER_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller(%arg0: index) -> (index, index) {
// CHECK-DAG: %[[C10:.+]] = arith.constant 10
%c10 = arith.constant 10 : index
- // CHECK: %[[RET0:.+]] = call @nonuniform_constant_arg_callee(%[[CALLER_ARG0]]) : (index) -> index
- %ret0 = call @nonuniform_constant_arg_callee(%arg0) : (index) -> index
- // CHECK: %[[RET1:.+]] = call @nonuniform_constant_arg_callee(%[[C10]]) : (index) -> index
- %ret1 = call @nonuniform_constant_arg_callee(%c10) : (index) -> index
- // CHECK: return %[[RET0]], %[[RET1]]
- return %ret0, %ret1 : index, index
+ // CHECK: %[[RET0:.+]] = util.call @nonuniform_constant_arg_callee(%[[CALLER_ARG0]]) : (index) -> index
+ %ret0 = util.call @nonuniform_constant_arg_callee(%arg0) : (index) -> index
+ // CHECK: %[[RET1:.+]] = util.call @nonuniform_constant_arg_callee(%[[C10]]) : (index) -> index
+ %ret1 = util.call @nonuniform_constant_arg_callee(%c10) : (index) -> index
+ // CHECK: util.return %[[RET0]], %[[RET1]]
+ util.return %ret0, %ret1 : index, index
}
// -----
@@ -307,53 +307,104 @@
// Tests that non-uniform call args w/ constants don't get optimized (order
// flipped from above).
-// CHECK-LABEL: func.func private @nonuniform_constant_arg_callee_flipped
+// CHECK-LABEL: util.func private @nonuniform_constant_arg_callee_flipped
// CHECK-SAME: (%[[ARG0:.+]]: index) -> index
-func.func private @nonuniform_constant_arg_callee_flipped(%arg0: index) -> index {
+util.func private @nonuniform_constant_arg_callee_flipped(%arg0: index) -> index {
// CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
%add = arith.addi %arg0, %arg0 : index
- // CHECK: return %[[ADD]]
- return %add : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
}
-// CHECK: func.func @nonuniform_arg_caller_flipped(%[[CALLER_ARG0:.+]]: index)
-func.func @nonuniform_arg_caller_flipped(%arg0: index) -> (index, index) {
+// CHECK: util.func public @nonuniform_arg_caller_flipped(%[[CALLER_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller_flipped(%arg0: index) -> (index, index) {
// CHECK-DAG: %[[C10:.+]] = arith.constant 10
%c10 = arith.constant 10 : index
- // CHECK: %[[RET0:.+]] = call @nonuniform_constant_arg_callee_flipped(%[[C10]]) : (index) -> index
- %ret0 = call @nonuniform_constant_arg_callee_flipped(%c10) : (index) -> index
- // CHECK: %[[RET1:.+]] = call @nonuniform_constant_arg_callee_flipped(%[[CALLER_ARG0]]) : (index) -> index
- %ret1 = call @nonuniform_constant_arg_callee_flipped(%arg0) : (index) -> index
- // CHECK: return %[[RET0]], %[[RET1]]
- return %ret0, %ret1 : index, index
+ // CHECK: %[[RET0:.+]] = util.call @nonuniform_constant_arg_callee_flipped(%[[C10]]) : (index) -> index
+ %ret0 = util.call @nonuniform_constant_arg_callee_flipped(%c10) : (index) -> index
+ // CHECK: %[[RET1:.+]] = util.call @nonuniform_constant_arg_callee_flipped(%[[CALLER_ARG0]]) : (index) -> index
+ %ret1 = util.call @nonuniform_constant_arg_callee_flipped(%arg0) : (index) -> index
+ // CHECK: util.return %[[RET0]], %[[RET1]]
+ util.return %ret0, %ret1 : index, index
+}
+
+// -----
+
+// Tests that non-uniform call args w/ constants don't get optimized.
+
+// CHECK-LABEL: util.func private @nonuniform_constant_arg_callee
+// CHECK-SAME: (%[[ARG0:.+]]: index) -> index
+util.func private @nonuniform_constant_arg_callee(%arg0: index) -> index {
+ // CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
+ %add = arith.addi %arg0, %arg0 : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
+}
+
+// CHECK: util.func public @nonuniform_arg_caller(%[[CALLER_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller(%arg0: index) -> (index, index) {
+ // CHECK-DAG: %[[C10:.+]] = arith.constant 10
+ %c10 = arith.constant 10 : index
+ // CHECK: %[[RET0:.+]] = util.call @nonuniform_constant_arg_callee(%[[CALLER_ARG0]]) : (index) -> index
+ %ret0 = util.call @nonuniform_constant_arg_callee(%arg0) : (index) -> index
+ // CHECK: %[[RET1:.+]] = util.call @nonuniform_constant_arg_callee(%[[C10]]) : (index) -> index
+ %ret1 = util.call @nonuniform_constant_arg_callee(%c10) : (index) -> index
+ // CHECK: util.return %[[RET0]], %[[RET1]]
+ util.return %ret0, %ret1 : index, index
+}
+
+// -----
+
+// Tests that non-uniform call args w/ constants don't get optimized (order
+// flipped from above).
+
+// CHECK-LABEL: util.func private @nonuniform_constant_arg_callee_flipped
+// CHECK-SAME: (%[[ARG0:.+]]: index) -> index
+util.func private @nonuniform_constant_arg_callee_flipped(%arg0: index) -> index {
+ // CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
+ %add = arith.addi %arg0, %arg0 : index
+ // CHECK: util.return %[[ADD]]
+ util.return %add : index
+}
+
+// CHECK: util.func public @nonuniform_arg_caller_flipped(%[[CALLER_ARG0:.+]]: index)
+util.func public @nonuniform_arg_caller_flipped(%arg0: index) -> (index, index) {
+ // CHECK-DAG: %[[C10:.+]] = arith.constant 10
+ %c10 = arith.constant 10 : index
+ // CHECK: %[[RET0:.+]] = util.call @nonuniform_constant_arg_callee_flipped(%[[C10]]) : (index) -> index
+ %ret0 = util.call @nonuniform_constant_arg_callee_flipped(%c10) : (index) -> index
+ // CHECK: %[[RET1:.+]] = util.call @nonuniform_constant_arg_callee_flipped(%[[CALLER_ARG0]]) : (index) -> index
+ %ret1 = util.call @nonuniform_constant_arg_callee_flipped(%arg0) : (index) -> index
+ // CHECK: util.return %[[RET0]], %[[RET1]]
+ util.return %ret0, %ret1 : index, index
}
// -----
// Tests that non-uniform call results don't get optimized.
-// CHECK-LABEL: func.func private @nonuniform_result_callee
+// CHECK-LABEL: util.func private @nonuniform_result_callee
// CHECK-SAME: (%[[ARG0:.+]]: i1) -> index
-func.func private @nonuniform_result_callee(%arg0: i1) -> index {
+util.func private @nonuniform_result_callee(%arg0: i1) -> index {
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
// CHECK: %[[C0:.+]] = arith.constant 0
%c0 = arith.constant 0 : index
- // CHECK: return %[[C0]]
- return %c0 : index
+ // CHECK: util.return %[[C0]]
+ util.return %c0 : index
^bb2:
// CHECK: %[[C1:.+]] = arith.constant 1
%c1 = arith.constant 1 : index
- // CHECK: return %[[C1]]
- return %c1 : index
+ // CHECK: util.return %[[C1]]
+ util.return %c1 : index
}
-// CHECK: func.func @nonuniform_result_caller(%[[ARG0:.+]]: i1)
-func.func @nonuniform_result_caller(%arg0: i1) -> index {
- // CHECK: %[[RET0:.+]] = call @nonuniform_result_callee(%[[ARG0]]) : (i1) -> index
- %ret0 = call @nonuniform_result_callee(%arg0) : (i1) -> index
- // CHECK: return %[[RET0]]
- return %ret0 : index
+// CHECK: util.func public @nonuniform_result_caller(%[[ARG0:.+]]: i1)
+util.func public @nonuniform_result_caller(%arg0: i1) -> index {
+ // CHECK: %[[RET0:.+]] = util.call @nonuniform_result_callee(%[[ARG0]]) : (i1) -> index
+ %ret0 = util.call @nonuniform_result_callee(%arg0) : (i1) -> index
+ // CHECK: util.return %[[RET0]]
+ util.return %ret0 : index
}
// -----
@@ -361,18 +412,18 @@
// Tests that args that directly pass-through to results get hoisted out into
// the caller.
-// CHECK-LABEL: func.func private @passthrough_callee() {
-func.func private @passthrough_callee(%arg0: index) -> index {
- // CHECK: return
- return %arg0 : index
+// CHECK-LABEL: util.func private @passthrough_callee() {
+util.func private @passthrough_callee(%arg0: index) -> index {
+ // CHECK: util.return
+ util.return %arg0 : index
}
-// CHECK: func.func @passthrough_caller(%[[ARG0:.+]]: index)
-func.func @passthrough_caller(%arg0: index) -> index {
+// CHECK: util.func public @passthrough_caller(%[[ARG0:.+]]: index)
+util.func public @passthrough_caller(%arg0: index) -> index {
// CHECK: call @passthrough_callee() : () -> ()
- %ret0 = call @passthrough_callee(%arg0) : (index) -> index
- // CHECK: return %[[ARG0]]
- return %ret0 : index
+ %ret0 = util.call @passthrough_callee(%arg0) : (index) -> index
+ // CHECK: util.return %[[ARG0]]
+ util.return %ret0 : index
}
// -----
@@ -380,19 +431,19 @@
// Tests that args that directly pass-through to results get hoisted out into
// the caller but they are preserved as args if they are used for other things.
-// CHECK-LABEL: func.func private @passthrough_preserve_arg_callee
+// CHECK-LABEL: util.func private @passthrough_preserve_arg_callee
// CHECK-SAME: (%[[ARG0:.+]]: index) -> index {
-func.func private @passthrough_preserve_arg_callee(%arg0: index) -> (index, index) {
+util.func private @passthrough_preserve_arg_callee(%arg0: index) -> (index, index) {
// CHECK: %[[ADD:.+]] = arith.addi %[[ARG0]], %[[ARG0]]
%add = arith.addi %arg0, %arg0 : index
- // CHECK: return %[[ADD]]
- return %arg0, %add : index, index
+ // CHECK: util.return %[[ADD]]
+ util.return %arg0, %add : index, index
}
-// CHECK: func.func @passthrough_preserve_arg_caller(%[[ARG0:.+]]: index)
-func.func @passthrough_preserve_arg_caller(%arg0: index) -> (index, index) {
- // CHECK: %[[RET1:.+]] = call @passthrough_preserve_arg_callee(%[[ARG0]]) : (index) -> index
- %ret:2 = call @passthrough_preserve_arg_callee(%arg0) : (index) -> (index, index)
- // CHECK: return %[[ARG0]], %[[RET1]]
- return %ret#0, %ret#1 : index, index
+// CHECK: util.func public @passthrough_preserve_arg_caller(%[[ARG0:.+]]: index)
+util.func public @passthrough_preserve_arg_caller(%arg0: index) -> (index, index) {
+ // CHECK: %[[RET1:.+]] = util.call @passthrough_preserve_arg_callee(%[[ARG0]]) : (index) -> index
+ %ret:2 = util.call @passthrough_preserve_arg_callee(%arg0) : (index) -> (index, index)
+ // CHECK: util.return %[[ARG0]], %[[RET1]]
+ util.return %ret#0, %ret#1 : index, index
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/outline_constants.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/outline_constants.mlir
index b43f0fb..76b27da 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/outline_constants.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/outline_constants.mlir
@@ -1,19 +1,19 @@
// RUN: iree-opt --split-input-file --iree-util-outline-constants %s | FileCheck %s
// CHECK-LABEL: @scalarConstant
-func.func @scalarConstant() {
+util.func @scalarConstant() {
// CHECK: = arith.constant 0 : i32
%cst = arith.constant 0 : i32
- return
+ util.return
}
// -----
// CHECK-LABEL: @splatConstant
-func.func @splatConstant() {
+util.func @splatConstant() {
// CHECK: = arith.constant dense<1.200000e+00> : tensor<512x128xf32>
%cst = arith.constant dense<1.2> : tensor<512x128xf32>
- return
+ util.return
}
// -----
@@ -21,10 +21,10 @@
// CHECK: util.global private @_constant {inlining_policy = #util.inline.never} = dense<[0.0287729427, 0.0297581609]> : tensor<2xf32>
// CHECK-NEXT: util.global private @_constant_0 {inlining_policy = #util.inline.never} = dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00]> : tensor<8xf32>
// CHECK-LABEL: @denseConstants
-func.func @denseConstants() {
+util.func @denseConstants() {
// CHECK: = util.global.load @_constant : tensor<2xf32>
%cst_0 = arith.constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32>
// CHECK-NEXT: = util.global.load @_constant_0 : tensor<8xf32>
%cst_1 = arith.constant dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> : tensor<8xf32>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/patterns.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/patterns.mlir
index cac5804..becfa2a 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/patterns.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/patterns.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @foldBrArguments
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG1:.+]]: index)
-func.func @foldBrArguments(%cond: i1, %arg1: index) -> index {
+util.func @foldBrArguments(%cond: i1, %arg1: index) -> index {
// CHECK: cf.cond_br %[[COND]]
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
@@ -19,15 +19,15 @@
^bb3(%bb3_0: index, %bb3_1: index, %bb3_2: index, %bb3_3: index):
// CHECK: %[[OP3:.+]] = "some.op3"(%[[BB3_ARG0]], %[[BB3_ARG1]], %[[BB3_ARG2]], %[[BB3_ARG1]])
%2 = "some.op3"(%bb3_0, %bb3_1, %bb3_2, %bb3_3) : (index, index, index, index) -> index
- // CHECK: return %[[OP3]]
- return %2 : index
+ // CHECK: util.return %[[OP3]]
+ util.return %2 : index
}
// -----
// CHECK-LABEL: @foldCondBrArguments
// CHECK-SAME: (%[[COND:.+]]: i1, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index)
-func.func @foldCondBrArguments(%cond: i1, %arg1: index, %arg2: index) -> index {
+util.func @foldCondBrArguments(%cond: i1, %arg1: index, %arg2: index) -> index {
// CHECK: cf.cond_br %[[COND]], ^bb1, ^bb2
cf.cond_br %cond, ^bb1(%arg1, %arg2, %arg2 : index, index, index),
^bb2(%arg1, %arg1, %arg2 : index, index, index)
@@ -36,20 +36,20 @@
// CHECK: %[[OP1:.+]] = "some.op1"(%[[ARG1]], %[[ARG2]], %[[ARG2]])
%0 = "some.op1"(%bb1_0, %bb1_1, %bb1_2) : (index, index, index) -> index
// CHECK: %[[OP1]]
- return %0 : index
+ util.return %0 : index
// CHECK: ^bb2:
^bb2(%bb2_0: index, %bb2_1: index, %bb2_2: index):
// CHECK: %[[OP2:.+]] = "some.op2"(%[[ARG1]], %[[ARG1]], %[[ARG2]])
%1 = "some.op2"(%bb2_0, %bb2_1, %bb2_2) : (index, index, index) -> index
- // CHECK: return %[[OP2]]
- return %1 : index
+ // CHECK: util.return %[[OP2]]
+ util.return %1 : index
}
// -----
// CHECK-LABEL: @elideBranchOperands
// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: index)
-func.func @elideBranchOperands(%arg0: index, %arg1: index) -> i32 {
+util.func @elideBranchOperands(%arg0: index, %arg1: index) -> i32 {
// CHECK-DAG: %[[C5I32:.+]] = arith.constant 5 : i32
// CHECK-DAG: %[[C1I32:.+]] = arith.constant 1 : i32
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -75,15 +75,15 @@
cf.br ^loopHeader(%newValue, %newCounter, %bodyMax : i32, index, index)
// CHECK: ^bb3:
^exit(%finalValue: i32):
- // CHECK: return %[[BB1_ARG0]]
- return %finalValue : i32
+ // CHECK: util.return %[[BB1_ARG0]]
+ util.return %finalValue : i32
}
// -----
// CHECK-LABEL: @indexSwitchToIf
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @indexSwitchToIf(%case: index) -> (i32, i64) {
+util.func @indexSwitchToIf(%case: index) -> (i32, i64) {
// CHECK-NOT: scf.index_switch
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[IS_CASE_1:.+]] = arith.cmpi eq, %[[CASE]], %[[C1]]
@@ -106,15 +106,15 @@
// CHECK-NEXT: scf.yield %[[DEFAULT_A]], %[[DEFAULT_B]]
scf.yield %default_a, %default_b : i32, i64
}
- // CHECK: return %[[RESULTS]]#0, %[[RESULTS]]#1
- return %results#0, %results#1 : i32, i64
+ // CHECK: util.return %[[RESULTS]]#0, %[[RESULTS]]#1
+ util.return %results#0, %results#1 : i32, i64
}
// -----
// CHECK-LABEL: @indexSwitchToIfNoResult
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @indexSwitchToIfNoResult(%case: index) {
+util.func @indexSwitchToIfNoResult(%case: index) {
// CHECK-NOT: scf.index_switch
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[IS_CASE_1:.+]] = arith.cmpi eq, %[[CASE]], %[[C1]]
@@ -131,15 +131,15 @@
"some.op.default"() : () -> ()
scf.yield
}
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
// CHECK-LABEL: @indexSwitchToIfNoDefault
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @indexSwitchToIfNoDefault(%case: index) {
+util.func @indexSwitchToIfNoDefault(%case: index) {
// CHECK-NOT: scf.index_switch
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[IS_CASE_1:.+]] = arith.cmpi eq, %[[CASE]], %[[C1]]
@@ -153,15 +153,15 @@
// CHECK-NOT: } else {
default {
}
- // CHECK: return
- return
+ // CHECK: util.return
+ util.return
}
// -----
// CHECK-LABEL: @mergeIndexSwitches
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @mergeIndexSwitches(%case: index) -> (i32, i32) {
+util.func @mergeIndexSwitches(%case: index) -> (i32, i32) {
// CHECK: %[[RESULTS:.+]]:2 = scf.index_switch %[[CASE]] -> i32, i32
%result0 = scf.index_switch %case -> i32
// CHECK-NEXT: case 0 {
@@ -204,15 +204,15 @@
%default = "some.op1.default"(%result0) : (i32) -> i32
scf.yield %default : i32
}
- // CHECK: return %[[RESULTS]]#0, %[[RESULTS]]#1
- return %result0, %result1 : i32, i32
+ // CHECK: util.return %[[RESULTS]]#0, %[[RESULTS]]#1
+ util.return %result0, %result1 : i32, i32
}
// -----
// CHECK-LABEL: @mergeIndexSwitchesNoResult
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @mergeIndexSwitchesNoResult(%case: index) {
+util.func @mergeIndexSwitchesNoResult(%case: index) {
// CHECK: scf.index_switch %[[CASE]]
scf.index_switch %case
// CHECK-NEXT: case 0 {
@@ -253,14 +253,14 @@
default {
"some.op1.default"() : () -> ()
}
- return
+ util.return
}
// -----
// CHECK-LABEL: @mergeIndexSwitchesIntoEmptyDefault
// CHECK-SAME: (%[[CASE:.+]]: index)
-func.func @mergeIndexSwitchesIntoEmptyDefault(%case: index) {
+util.func @mergeIndexSwitchesIntoEmptyDefault(%case: index) {
// CHECK: scf.index_switch %[[CASE]]
scf.index_switch %case
// CHECK-NEXT: case 0 {
@@ -295,5 +295,5 @@
}
default {
}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_bf16_to_f32.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_bf16_to_f32.mlir
index be8edc0..9080f63 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_bf16_to_f32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_bf16_to_f32.mlir
@@ -1,26 +1,26 @@
// RUN: iree-opt --split-input-file --iree-util-promote-bf16-to-f32 %s | FileCheck %s
// CHECK: util.global {{.*}} : tensor<4xf32>
-// CHECK-LABEL: func.func @simple_bf16() -> tensor<4xf32>
+// CHECK-LABEL: util.func public @simple_bf16() -> tensor<4xf32>
// CHECK-NEXT: %{{.*}} = util.global.address @__global : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: %{{.*}} = util.global.load.indirect %{{.*}} : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
-// CHECK-NEXT: return %{{.*}} : tensor<4xf32>
+// CHECK-NEXT: util.return %{{.*}} : tensor<4xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xbf16>
-func.func @simple_bf16() -> (tensor<4xbf16>) {
+util.func public @simple_bf16() -> (tensor<4xbf16>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xbf16>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xbf16>> -> tensor<4xbf16>
- return %1 : tensor<4xbf16>
+ util.return %1 : tensor<4xbf16>
}
// -----
// CHECK: util.global
// CHECK-NOT: bf16
-// CHECK-LABEL: func.func @nested_region_bf16()
+// CHECK-LABEL: util.func public @nested_region_bf16()
// CHECK-NOT: bf16
-// CHECK: return %{{.*}} : tensor<?xf32>
+// CHECK: util.return %{{.*}} : tensor<?xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xbf16>
-func.func @nested_region_bf16() -> (tensor<?xbf16>) {
+util.func public @nested_region_bf16() -> (tensor<?xbf16>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xbf16>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xbf16>> -> tensor<4xbf16>
%c4 = arith.constant 4 : index
@@ -29,5 +29,5 @@
%element = tensor.extract %1[%arg0] : tensor<4xbf16>
tensor.yield %element : bf16
} : tensor<?xbf16>
- return %2 : tensor<?xbf16>
+ util.return %2 : tensor<?xbf16>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_f16_to_f32.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_f16_to_f32.mlir
index e7ddbb5..40232c3 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_f16_to_f32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/promote_f16_to_f32.mlir
@@ -3,26 +3,26 @@
// NOTE: for more comprehensive tests see demote_i64_to_i32.mlir.
// CHECK: util.global {{.*}} : tensor<4xf32>
-// CHECK-LABEL: func.func @simple_f16() -> tensor<4xf32>
+// CHECK-LABEL: util.func public @simple_f16() -> tensor<4xf32>
// CHECK-NEXT: %{{.*}} = util.global.address @__global : !util.ptr<tensor<4xf32>>
// CHECK-NEXT: %{{.*}} = util.global.load.indirect %{{.*}} : !util.ptr<tensor<4xf32>> -> tensor<4xf32>
-// CHECK-NEXT: return %{{.*}} : tensor<4xf32>
+// CHECK-NEXT: util.return %{{.*}} : tensor<4xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf16>
-func.func @simple_f16() -> (tensor<4xf16>) {
+util.func public @simple_f16() -> (tensor<4xf16>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf16>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf16>> -> tensor<4xf16>
- return %1 : tensor<4xf16>
+ util.return %1 : tensor<4xf16>
}
// -----
// CHECK: util.global
// CHECK-NOT: f16
-// CHECK-LABEL: func.func @nested_region_f16()
+// CHECK-LABEL: util.func public @nested_region_f16()
// CHECK-NOT: f16
-// CHECK: return %{{.*}} : tensor<?xf32>
+// CHECK: util.return %{{.*}} : tensor<?xf32>
util.global private @"__global" = dense<[1.000000e+01, 5.000000e+00, 1.000000e+01, 5.000000e+00]> : tensor<4xf16>
-func.func @nested_region_f16() -> (tensor<?xf16>) {
+util.func public @nested_region_f16() -> (tensor<?xf16>) {
%0 = util.global.address @"__global" : !util.ptr<tensor<4xf16>>
%1 = util.global.load.indirect %0 : !util.ptr<tensor<4xf16>> -> tensor<4xf16>
%c4 = arith.constant 4 : index
@@ -31,5 +31,5 @@
%element = tensor.extract %1[%arg0] : tensor<4xf16>
tensor.yield %element : f16
} : tensor<?xf16>
- return %2 : tensor<?xf16>
+ util.return %2 : tensor<?xf16>
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
index 81cbff3..c188fed 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
@@ -11,7 +11,7 @@
util.global private mutable @constantGlobal : !util.buffer
// CHECK-LABEL: @globalLoad
-func.func private @globalLoad() {
+util.func private @globalLoad() {
// CHECK-NEXT: %[[RESOURCE:.+]] = util.global.load @constantGlobal : !util.buffer
// CHECK-NEXT: %[[STORAGE_SIZE:.+]] = util.global.load @constantGlobal__storage_size : index
// CHECK-NEXT: %[[OFFSET:.+]] = util.global.load @constantGlobal__offset : index
@@ -20,7 +20,7 @@
%0 = util.global.load @constantGlobal : !util.buffer
// CHECK-NEXT: util.optimization_barrier %[[SUBRANGE]]
util.optimization_barrier %0 : !util.buffer
- return
+ util.return
}
// -----
@@ -37,13 +37,13 @@
// CHECK-LABEL: @globalStore
// CHECK-SAME: (%[[RESOURCE:.+]]: !util.buffer, %[[STORAGE_SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
-func.func private @globalStore(%resource: !util.buffer) {
+util.func private @globalStore(%resource: !util.buffer) {
// CHECK: util.global.store %[[RESOURCE]], @mutableGlobal : !util.buffer
// CHECK: util.global.store %[[STORAGE_SIZE]], @mutableGlobal__storage_size : index
// CHECK: util.global.store %[[OFFSET]], @mutableGlobal__offset : index
// CHECK: util.global.store %[[LENGTH]], @mutableGlobal__length : index
util.global.store %resource, @mutableGlobal : !util.buffer
- return
+ util.return
}
// -----
@@ -55,7 +55,7 @@
// CHECK-LABEL: @funcArgs
// CHECK-SAME: (%[[RESOURCE0:.+]]: !util.buffer, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !util.buffer, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @funcArgs(%resource0: !util.buffer, %resource1: !util.buffer) {
+util.func private @funcArgs(%resource0: !util.buffer, %resource1: !util.buffer) {
// CHECK-NEXT: %[[SUBRANGE0:.+]] = util.buffer.subspan %[[RESOURCE0]][%[[OFFSET0]]] : !util.buffer{%[[STORAGE_SIZE0]]} -> !util.buffer{%[[LENGTH0]]}
// CHECK-NEXT: %[[SUBRANGE1:.+]] = util.buffer.subspan %[[RESOURCE1]][%[[OFFSET1]]] : !util.buffer{%[[STORAGE_SIZE1]]} -> !util.buffer{%[[LENGTH1]]}
@@ -63,7 +63,8 @@
util.optimization_barrier %resource0 : !util.buffer
// CHECK-NEXT: util.optimization_barrier %[[SUBRANGE1]]
util.optimization_barrier %resource1 : !util.buffer
- return
+
+ util.return
}
// -----
@@ -76,13 +77,13 @@
// CHECK-LABEL: @funcResults
// CHECK-SAME: (%[[RESOURCE0:.+]]: !util.buffer, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !util.buffer, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
// CHECK-SAME: -> (!util.buffer, index, index, index, !util.buffer, index, index, index)
-func.func private @funcResults(%resource0: !util.buffer, %resource1: !util.buffer) -> (!util.buffer, !util.buffer) {
+util.func private @funcResults(%resource0: !util.buffer, %resource1: !util.buffer) -> (!util.buffer, !util.buffer) {
// NOTE: there will be extra stuff here from the arg insertion. Since the
// return should consume the subrange that was inserted we expect to directly
// use the function arguments.
- // CHECK: return %[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]], %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]]
- return %resource0, %resource1 : !util.buffer, !util.buffer
+ // CHECK: util.return %[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]], %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]]
+ util.return %resource0, %resource1 : !util.buffer, !util.buffer
}
@@ -92,9 +93,9 @@
// CHECK-LABEL: @publicFuncSignature
// CHECK-SAME: (%[[RESOURCE:.+]]: !util.buffer) -> !util.buffer
-func.func @publicFuncSignature(%resource: !util.buffer) -> !util.buffer {
- // CHECK-NEXT: return %[[RESOURCE]] : !util.buffer
- return %resource : !util.buffer
+util.func @publicFuncSignature(%resource: !util.buffer) -> !util.buffer {
+ // CHECK-NEXT: util.return %[[RESOURCE]] : !util.buffer
+ util.return %resource : !util.buffer
}
// -----
@@ -107,15 +108,15 @@
// CHECK-LABEL: @caller
// CHECK-SAME: (%[[RESOURCE0:.+]]: !util.buffer, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !util.buffer, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @caller(%resource0: !util.buffer, %resource1: !util.buffer) {
+util.func private @caller(%resource0: !util.buffer, %resource1: !util.buffer) {
// NOTE: there will be extra stuff here from the arg insertion. The call
// consumes the subranges and we expect the args to be passed directly.
- // CHECK: %[[RET:.+]]:8 = call @callee(%[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]],
+ // CHECK: %[[RET:.+]]:8 = util.call @callee(%[[RESOURCE0]], %[[STORAGE_SIZE0]], %[[OFFSET0]], %[[LENGTH0]],
// CHECK-SAME: %[[RESOURCE1]], %[[STORAGE_SIZE1]], %[[OFFSET1]], %[[LENGTH1]])
// CHECK-SAME: : (!util.buffer, index, index, index, !util.buffer, index, index, index)
// CHECK-SAME: -> (!util.buffer, index, index, index, !util.buffer, index, index, index)
- %0:2 = call @callee(%resource0, %resource1) : (!util.buffer, !util.buffer) -> (!util.buffer, !util.buffer)
+ %0:2 = util.call @callee(%resource0, %resource1) : (!util.buffer, !util.buffer) -> (!util.buffer, !util.buffer)
// CHECK-NEXT: %[[RET_SUBRANGE0:.+]] = util.buffer.subspan %[[RET]]#0[%[[RET]]#2] : !util.buffer{%[[RET]]#1} -> !util.buffer{%[[RET]]#3}
// CHECK-NEXT: %[[RET_SUBRANGE1:.+]] = util.buffer.subspan %[[RET]]#4[%[[RET]]#6] : !util.buffer{%[[RET]]#5} -> !util.buffer{%[[RET]]#7}
@@ -124,11 +125,11 @@
// CHECK-NEXT: util.optimization_barrier %[[RET_SUBRANGE1]] : !util.buffer
util.optimization_barrier %0#1 : !util.buffer
- return
+ util.return
}
-func.func private @callee(%arg0: !util.buffer, %arg1: !util.buffer) -> (!util.buffer, !util.buffer) {
- return %arg0, %arg1 : !util.buffer, !util.buffer
+util.func private @callee(%arg0: !util.buffer, %arg1: !util.buffer) -> (!util.buffer, !util.buffer) {
+ util.return %arg0, %arg1 : !util.buffer, !util.buffer
}
// -----
@@ -139,21 +140,21 @@
// CHECK-LABEL: @callerInSCF
// CHECK-SAME: (%[[RESOURCE:.+]]: !util.buffer, %[[STORAGE_SIZE:.+]]: index, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index, %[[COND:.+]]: i1)
-func.func private @callerInSCF(%resource: !util.buffer, %cond: i1) {
+util.func private @callerInSCF(%resource: !util.buffer, %cond: i1) {
// NOTE: there will be extra stuff here from the arg insertion. The call
// consumes the subranges and we expect the args to be passed directly.
// CHECK: scf.if %[[COND]]
scf.if %cond {
- // CHECK: func.call @callee(%[[RESOURCE]], %[[STORAGE_SIZE]], %[[OFFSET]], %[[LENGTH]])
- func.call @callee(%resource) : (!util.buffer) -> ()
+ // CHECK: util.call @callee(%[[RESOURCE]], %[[STORAGE_SIZE]], %[[OFFSET]], %[[LENGTH]])
+ util.call @callee(%resource) : (!util.buffer) -> ()
}
- return
+ util.return
}
-func.func private @callee(%arg0: !util.buffer) {
- return
+util.func private @callee(%arg0: !util.buffer) {
+ util.return
}
// -----
@@ -165,7 +166,7 @@
// CHECK-LABEL: @callerWithSubrange
// CHECK-SAME: (%[[ARG_RESOURCE:.+]]: !util.buffer, %[[ARG_SIZE:.+]]: index, %[[ARG_OFFSET:.+]]: index, %[[ARG_LENGTH:.+]]: index)
-func.func private @callerWithSubrange(%arg: !util.buffer) {
+util.func private @callerWithSubrange(%arg: !util.buffer) {
// NOTE: there will be extra stuff here from the arg insertion. The call
// consumes the subranges and we expect the args to be passed directly.
@@ -177,8 +178,8 @@
// CHECK-DAG: %[[ARG_ADJUSTED_OFFSET:.+]] = arith.addi %[[ARG_OFFSET]], %[[ARG_LOCAL_OFFSET]]
%arg_subspan = util.buffer.subspan %arg[%arg_offset] : !util.buffer{%arg_size} -> !util.buffer{%arg_length}
- // CHECK: %[[RET0:.+]]:4 = call @callee(%[[ARG_RESOURCE]], %[[ARG_SIZE]], %[[ARG_ADJUSTED_OFFSET]], %[[ARG_LOCAL_LENGTH]])
- %ret0 = call @callee(%arg_subspan) : (!util.buffer) -> (!util.buffer)
+ // CHECK: %[[RET0:.+]]:4 = util.call @callee(%[[ARG_RESOURCE]], %[[ARG_SIZE]], %[[ARG_ADJUSTED_OFFSET]], %[[ARG_LOCAL_LENGTH]])
+ %ret0 = util.call @callee(%arg_subspan) : (!util.buffer) -> (!util.buffer)
%ret0_size = util.buffer.size %ret0 : !util.buffer
// CHECK-DAG: %[[RET0_LOCAL_OFFSET:.+]] = arith.constant 300
@@ -188,18 +189,18 @@
// CHECK-DAG: %[[RET0_ADJUSTED_OFFSET:.+]] = arith.addi %[[RET0]]#2, %[[RET0_LOCAL_OFFSET]]
%ret0_subspan = util.buffer.subspan %ret0[%ret0_offset] : !util.buffer{%ret0_size} -> !util.buffer{%ret0_length}
- // CHECK: %[[RET1:.+]]:4 = call @callee(%[[RET0]]#0, %[[RET0]]#1, %[[RET0_ADJUSTED_OFFSET]], %[[RET0_LOCAL_LENGTH]])
- %ret1 = call @callee(%ret0_subspan) : (!util.buffer) -> (!util.buffer)
+ // CHECK: %[[RET1:.+]]:4 = util.call @callee(%[[RET0]]#0, %[[RET0]]#1, %[[RET0_ADJUSTED_OFFSET]], %[[RET0_LOCAL_LENGTH]])
+ %ret1 = util.call @callee(%ret0_subspan) : (!util.buffer) -> (!util.buffer)
// CHECK: %[[RET1_SUBRANGE:.+]] = util.buffer.subspan %[[RET1]]#0[%[[RET1]]#2] : !util.buffer{%[[RET1]]#1} -> !util.buffer{%[[RET1]]#3}
// CHECK-NEXT: util.optimization_barrier %[[RET1_SUBRANGE]] : !util.buffer
util.optimization_barrier %ret1 : !util.buffer
- return
+ util.return
}
-func.func private @callee(%arg0: !util.buffer) -> !util.buffer {
- return %arg0 : !util.buffer
+util.func private @callee(%arg0: !util.buffer) -> !util.buffer {
+ util.return %arg0 : !util.buffer
}
// -----
@@ -211,7 +212,7 @@
// CHECK-LABEL: @br
// CHECK-SAME: (%[[RESOURCE0:.+]]: !util.buffer, %[[STORAGE_SIZE0:.+]]: index, %[[OFFSET0:.+]]: index, %[[LENGTH0:.+]]: index, %[[RESOURCE1:.+]]: !util.buffer, %[[STORAGE_SIZE1:.+]]: index, %[[OFFSET1:.+]]: index, %[[LENGTH1:.+]]: index)
-func.func private @br(%resource0: !util.buffer, %resource1: !util.buffer) {
+util.func private @br(%resource0: !util.buffer, %resource1: !util.buffer) {
// NOTE: there will be extra stuff here from the arg insertion. The branch
// consumes the unready resources and we expect the args to be passed directly
// to the cf.br.
@@ -230,5 +231,5 @@
// CHECK-NEXT: util.optimization_barrier %[[BB1_SUBRANGE1]]
util.optimization_barrier %bb1_resource1 : !util.buffer
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/simplify_global_accesses.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/simplify_global_accesses.mlir
index 9e2077a..49abd98 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/simplify_global_accesses.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/simplify_global_accesses.mlir
@@ -1,10 +1,10 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-util-simplify-global-accesses))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-util-simplify-global-accesses))' %s | FileCheck %s
util.global private @varA = dense<1> : tensor<2xi32>
util.global private @varB = dense<3> : tensor<2x4xi32>
// CHECK-LABEL: @constants()
-func.func @constants() {
+util.func public @constants() {
// CHECK-DAG: constant 10
%w = arith.constant 10 : index
// CHECK-DAG: %[[VAR_A:.+]] = util.global.load @varA : tensor<2xi32>
@@ -15,7 +15,7 @@
%varB = util.global.load @varB : tensor<2x4xi32>
// CHECK-NEXT: flow.dispatch @ex::@dispatch1{{.+}}(%[[T]], %[[VAR_B]])
%d1 = flow.dispatch @ex::@dispatch1[%w](%d0, %varB) : (tensor<2xi32>, tensor<2x4xi32>) -> tensor<2xi32>
- return
+ util.return
}
// -----
@@ -24,7 +24,7 @@
util.global private @varB = 2 : i32
// CHECK-LABEL: @constants_in_cfg
-func.func @constants_in_cfg(%start: i32, %bound: i32) -> i32 {
+util.func public @constants_in_cfg(%start: i32, %bound: i32) -> i32 {
// CHECK-NEXT: %[[VAR_A:.+]] = util.global.load @varA : i32
// CHECK-NEXT: %[[VAR_B:.+]] = util.global.load @varB : i32
// CHECK-NEXT: cf.br ^bb1
@@ -47,8 +47,8 @@
%11 = util.global.load @varB : i32
// CHECK-NEXT: %[[T1:.+]] = arith.subi %[[T0]], %[[VAR_B]]
%12 = arith.subi %10, %11 : i32
- // CHECK-NEXT: return %[[T1]]
- return %12 : i32
+ // CHECK-NEXT: util.return %[[T1]]
+ util.return %12 : i32
}
// -----
@@ -57,7 +57,7 @@
util.global private @varB = dense<3> : tensor<2x4xi32>
// CHECK-LABEL: @mixed_mutability
-func.func @mixed_mutability() {
+util.func public @mixed_mutability() {
// CHECK-DAG: %[[VAR_A:.+]] = util.global.load @varA : tensor<2xi32>
// CHECK-DAG: %[[VAR_B:.+]] = util.global.load @varB : tensor<2x4xi32>
// CHECK-NEXT: constant 10
@@ -70,7 +70,7 @@
%d1 = flow.dispatch @ex::@dispatch1[%w](%d0, %varB) : (tensor<2xi32>, tensor<2x4xi32>) -> tensor<2xi32>
// CHECK-NEXT: util.global.store %[[T1]], @varA : tensor<2xi32>
util.global.store %d1, @varA : tensor<2xi32>
- return
+ util.return
}
// -----
@@ -78,14 +78,14 @@
util.global private mutable @varA = dense<1> : tensor<2xi32>
// CHECK-LABEL: @raw
-func.func @raw() {
+util.func public @raw() {
// CHECK: %[[T:.+]] = util.global.load @varA {id = 0
%varA_0 = util.global.load @varA {id = 0} : tensor<2xi32>
util.global.store %varA_0, @varA {id = 0} : tensor<2xi32>
%varA_1 = util.global.load @varA {id = 1} : tensor<2xi32>
// CHECK-NEXT: util.global.store %[[T]], @varA {id = 1
util.global.store %varA_1, @varA {id = 1} : tensor<2xi32>
- return
+ util.return
}
// -----
@@ -93,12 +93,12 @@
util.global private mutable @varA = dense<1> : tensor<2xi32>
// CHECK-LABEL: @rar
-func.func @rar() -> (tensor<2xi32>, tensor<2xi32>) {
+util.func public @rar() -> (tensor<2xi32>, tensor<2xi32>) {
// CHECK: %[[T:.+]] = util.global.load @varA {id = 0
%varA_0 = util.global.load @varA {id = 0} : tensor<2xi32>
%varA_1 = util.global.load @varA {id = 1} : tensor<2xi32>
- // CHECK-NEXT: return %[[T]], %[[T]]
- return %varA_0, %varA_1 : tensor<2xi32>, tensor<2xi32>
+ // CHECK-NEXT: util.return %[[T]], %[[T]]
+ util.return %varA_0, %varA_1 : tensor<2xi32>, tensor<2xi32>
}
// -----
@@ -107,11 +107,11 @@
// CHECK-LABEL: @waw
// CHECK-SAME: (%[[ARG0:.+]]: tensor<2xi32>, %[[ARG1:.+]]: tensor<2xi32>)
-func.func @waw(%varA_0: tensor<2xi32>, %varA_1: tensor<2xi32>) {
+util.func public @waw(%varA_0: tensor<2xi32>, %varA_1: tensor<2xi32>) {
util.global.store %varA_0, @varA : tensor<2xi32>
// CHECK-NEXT: util.global.store %[[ARG1]], @varA
util.global.store %varA_1, @varA : tensor<2xi32>
- return
+ util.return
}
// -----
@@ -119,21 +119,21 @@
util.global private mutable @varA = dense<1> : tensor<2xi32>
// CHECK-LABEL: @side_effects(
-func.func @side_effects() {
+util.func public @side_effects() {
// CHECK-NEXT: %[[T0:.+]] = util.global.load @varA
%varA_0 = util.global.load @varA : tensor<2xi32>
// CHECK-NEXT: util.global.store %[[T0]], @varA
util.global.store %varA_0, @varA : tensor<2xi32>
- // CHECK-NEXT: call @other_fn()
- call @other_fn() : () -> ()
+ // CHECK-NEXT: util.call @other_fn()
+ util.call @other_fn() : () -> ()
// CHECK-NEXT: %[[T1:.+]] = util.global.load @varA
%varA_1 = util.global.load @varA : tensor<2xi32>
// CHECK-NEXT: util.global.store %[[T1]], @varA
util.global.store %varA_1, @varA : tensor<2xi32>
- return
+ util.return
}
-func.func private @other_fn()
+util.func private @other_fn()
// -----
@@ -141,7 +141,7 @@
util.global private mutable @varB = dense<2> : tensor<2xi32>
// CHECK-LABEL: @ordering
-func.func @ordering() {
+util.func public @ordering() {
%cst_top = arith.constant 1 : index
%varA_0 = util.global.load @varA {id = 0} : tensor<2xi32>
util.global.store %varA_0, @varA {id = 0} : tensor<2xi32>
@@ -160,5 +160,5 @@
// CHECK-NEXT: arith.constant
// CHECK-DAG: util.global.store %[[T0]], @varA {id = 0
// CHECK-DAG: util.global.store %[[T1]], @varB {id = 1
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/strip_debug_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/strip_debug_ops.mlir
index fcf7d37..52894e3 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/strip_debug_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/strip_debug_ops.mlir
@@ -1,8 +1,8 @@
-// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(func.func(iree-util-strip-debug-ops))' %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-util-strip-debug-ops))' %s | FileCheck %s
// CHECK-LABEL: @stripAssert
-func.func @stripAssert(%cond: i1) {
+util.func @stripAssert(%cond: i1) {
// CHECK-NOT: cf.assert
cf.assert %cond, "hello!"
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis.mlir
index d5aa9f5..238567c 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis.mlir
@@ -1,67 +1,67 @@
// RUN: iree-opt --split-input-file --iree-util-test-float-range-analysis --allow-unregistered-dialect %s | FileCheck %s
// CHECK-LABEL: @scalar_const_trunc
-func.func @scalar_const_trunc() -> f32 {
+util.func @scalar_const_trunc() -> f32 {
%0 = arith.constant 5.0 : f32
// CHECK: fp-range: [5.000000, 5.000000, TRUNC]
%1 = "iree_unregistered.test_fprange"(%0) : (f32) -> f32
- return %1 : f32
+ util.return %1 : f32
}
// -----
// CHECK-LABEL: @scalar_const_non_trunc
-func.func @scalar_const_non_trunc() -> f32 {
+util.func @scalar_const_non_trunc() -> f32 {
%0 = arith.constant 5.2 : f32
// CHECK: fp-range: [5.200000, 5.200000, !trunc]
%1 = "iree_unregistered.test_fprange"(%0) : (f32) -> f32
- return %1 : f32
+ util.return %1 : f32
}
// -----
// CHECK-LABEL: @scalar_non_float
-func.func @scalar_non_float() -> i32 {
+util.func @scalar_non_float() -> i32 {
%0 = arith.constant 5 : i32
// NOTE: The least-constrained value is returned for a non-fp type. It
// is up to the user to ensure that we are requesting stats for fp types
// and this represents the soft-failure mode if asking about an illegal type.
// CHECK: fp-range: [-inf, inf, !trunc]
%1 = "iree_unregistered.test_fprange"(%0) : (i32) -> i32
- return %1 : i32
+ util.return %1 : i32
}
// -----
// CHECK-LABEL: @tensor_const_trunc
-func.func @tensor_const_trunc() -> tensor<2xf32> {
+util.func @tensor_const_trunc() -> tensor<2xf32> {
%0 = arith.constant dense<[-2.0, 2.0]> : tensor<2xf32>
// CHECK: fp-range: [-2.000000, 2.000000, TRUNC]
%1 = "iree_unregistered.test_fprange"(%0) : (tensor<2xf32>) -> tensor<2xf32>
- return %1 : tensor<2xf32>
+ util.return %1 : tensor<2xf32>
}
// -----
// CHECK-LABEL: @tensor_const_non_trunc
-func.func @tensor_const_non_trunc() -> tensor<2xf32> {
+util.func @tensor_const_non_trunc() -> tensor<2xf32> {
%0 = arith.constant dense<[-1.2, 2.0]> : tensor<2xf32>
// CHECK: fp-range: [-1.200000, 2.000000, !trunc]
%1 = "iree_unregistered.test_fprange"(%0) : (tensor<2xf32>) -> tensor<2xf32>
- return %1 : tensor<2xf32>
+ util.return %1 : tensor<2xf32>
}
// -----
// CHECK-LABEL: @min_max_no_trunc
-func.func @min_max_no_trunc(%arg0 : f32) -> f32 {
+util.func @min_max_no_trunc(%arg0 : f32) -> f32 {
%0 = arith.constant -5.0 : f32
%1 = arith.constant 5.0 : f32
%2 = arith.minimumf %arg0, %1 : f32
%3 = arith.maximumf %2, %0 : f32
// CHECK: fp-range: [-5.000000, 5.000000, !trunc]
%result = "iree_unregistered.test_fprange"(%3) : (f32) -> f32
- return %result : f32
+ util.return %result : f32
}
// -----
// CHECK-LABEL: @min_max_floor
-func.func @min_max_floor(%arg0 : f32) -> f32 {
+util.func @min_max_floor(%arg0 : f32) -> f32 {
%0 = arith.constant -5.0 : f32
%1 = arith.constant 5.0 : f32
%2 = arith.minimumf %arg0, %1 : f32
@@ -69,12 +69,12 @@
%4 = math.floor %3 : f32
// CHECK: fp-range: [-5.000000, 5.000000, TRUNC]
%result = "iree_unregistered.test_fprange"(%4) : (f32) -> f32
- return %result : f32
+ util.return %result : f32
}
// -----
// CHECK-LABEL: @min_max_floor_adj_range
-func.func @min_max_floor_adj_range(%arg0 : f32) -> f32 {
+util.func @min_max_floor_adj_range(%arg0 : f32) -> f32 {
%0 = arith.constant -5.2 : f32
%1 = arith.constant 5.2 : f32
%2 = arith.minimumf %arg0, %1 : f32
@@ -82,12 +82,12 @@
%4 = math.floor %3 : f32
// CHECK: fp-range: [-6.000000, 5.000000, TRUNC]
%result = "iree_unregistered.test_fprange"(%4) : (f32) -> f32
- return %result : f32
+ util.return %result : f32
}
// -----
// CHECK-LABEL: @floor_min_max
-func.func @floor_min_max(%arg0 : f32) -> f32 {
+util.func @floor_min_max(%arg0 : f32) -> f32 {
%0 = arith.constant -5.0 : f32
%1 = arith.constant 5.0 : f32
%2 = math.floor %arg0 : f32
@@ -95,5 +95,5 @@
%4 = arith.minimumf %3, %1 : f32
// CHECK: fp-range: [-5.000000, 5.000000, TRUNC]
%result = "iree_unregistered.test_fprange"(%4) : (f32) -> f32
- return %result : f32
+ util.return %result : f32
}
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis_linalg.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis_linalg.mlir
index e1a4003..a6e88e8 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis_linalg.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/test_float_range_analysis_linalg.mlir
@@ -2,7 +2,7 @@
#map0 = affine_map<(d0, d1) -> ()>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
-func.func @linalg_generic_traversal(%arg0 : tensor<5x6xf32>) -> (tensor<5x6xf32>, tensor<5x6xf32>, tensor<5x6xf32>) {
+util.func @linalg_generic_traversal(%arg0 : tensor<5x6xf32>) -> (tensor<5x6xf32>, tensor<5x6xf32>, tensor<5x6xf32>) {
%cst_min = arith.constant dense<-1.270000e+02> : tensor<f32>
%cst_max = arith.constant dense<1.270000e+02> : tensor<f32>
%init = tensor.empty() : tensor<5x6xf32>
@@ -38,5 +38,5 @@
%max_range = "iree_unregistered.test_fprange"(%max) : (tensor<5x6xf32>) -> tensor<5x6xf32>
// CHECK: fp-range: [-inf, inf, TRUNC]
%floor_range = "iree_unregistered.test_fprange"(%floor) : (tensor<5x6xf32>) -> tensor<5x6xf32>
- return %result_range, %max_range, %floor_range : tensor<5x6xf32>, tensor<5x6xf32>, tensor<5x6xf32>
+ util.return %result_range, %max_range, %floor_range : tensor<5x6xf32>, tensor<5x6xf32>, tensor<5x6xf32>
}
diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
index 8345e92..9321d82 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/StandardToVM/ConvertStandardToVM.cpp
@@ -173,6 +173,7 @@
constexpr const char *kRetainedAttributes[] = {
"nosideeffects",
"vm.fallback",
+ "vm.signature",
};
auto retainedAttributes = ArrayRef<const char *>(
kRetainedAttributes,
@@ -283,7 +284,7 @@
// (Slow) lookup of the target function, which may be an import that we need
// to perform type conversion for.
auto calleeOp = SymbolTable::lookupSymbolIn(rootOp, calleeName);
- if (auto funcOp = dyn_cast_or_null<func::FuncOp>(calleeOp)) {
+ if (auto funcOp = dyn_cast_or_null<FunctionOpInterface>(calleeOp)) {
if (funcOp.isExternal()) {
// Import that may require conversion.
// This case handles when funcs are declared after the call.
diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStructuralOps.cpp b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStructuralOps.cpp
index 5c0614e..87f0b56 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStructuralOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/UtilToVM/ConvertStructuralOps.cpp
@@ -146,6 +146,7 @@
constexpr const char *kRetainedAttributes[] = {
"nosideeffects",
"vm.fallback",
+ "vm.signature",
};
auto retainedAttributes = ArrayRef<const char *>(
kRetainedAttributes,
diff --git a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
index efcb00b..2eebddd 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
+++ b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
@@ -99,7 +99,6 @@
"@llvm-project//mlir:ArithUtils",
"@llvm-project//mlir:ControlFlowDialect",
"@llvm-project//mlir:DialectUtils",
- "@llvm-project//mlir:FuncDialect",
"@llvm-project//mlir:FunctionInterfaces",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgDialect",
diff --git a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
index 11c4af8..63e91a3 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
+++ b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
@@ -73,7 +73,6 @@
MLIRArithDialect
MLIRArithUtils
MLIRControlFlowDialect
- MLIRFuncDialect
MLIRFunctionInterfaces
MLIRIR
MLIRLinalgDialect
diff --git a/compiler/src/iree/compiler/GlobalOptimization/ExpandTensorShapes.cpp b/compiler/src/iree/compiler/GlobalOptimization/ExpandTensorShapes.cpp
index 1549a37..d652ace 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/ExpandTensorShapes.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/ExpandTensorShapes.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/BreadthFirstIterator.h"
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
@@ -59,7 +58,8 @@
// dynamic dims as globals as duplicates will get added and we'll need to rely
// on global fusion to get rid of them. Note that this only expands globals and
// does not yet update use sites - we just need the ops to reference.
-static ExpandedGlobalMap expandGlobalTensorDims(Operation *rootOp) {
+static ExpandedGlobalMap expandGlobalTensorDims(Operation *rootOp,
+ SymbolTable &symbolTable) {
ExpandedGlobalMap expandedGlobals;
// Gather all of the dynamically-shaped tensor globals in the root.
@@ -72,7 +72,6 @@
}
// Expand each global by adding one global per dynamic dim beside it.
- SymbolTable symbolTable(rootOp);
auto indexType = IndexType::get(rootOp->getContext());
for (auto &it : expandedGlobals) {
auto &global = it.second;
@@ -108,19 +107,23 @@
llvm::any_of(op->getResultTypes(), isDynamicTensor);
}
+static void expandType(Type type, SmallVectorImpl<Type> &newTypes) {
+ newTypes.push_back(type);
+ if (auto tensorType = llvm::dyn_cast<RankedTensorType>(type)) {
+ newTypes.append(tensorType.getNumDynamicDims(),
+ IndexType::get(type.getContext()));
+ }
+}
+
// Expands tensors in the given |types| list to (tensor, dynamic dims...).
// This could be changed to some iterator magic to avoid the alloc.
static SmallVector<Type> expandTypes(TypeRange types) {
if (types.empty())
return {};
- auto indexType = IndexType::get(types.front().getContext());
SmallVector<Type> newTypes;
newTypes.reserve(types.size() * 2);
for (auto type : types) {
- newTypes.push_back(type);
- if (auto tensorType = llvm::dyn_cast<RankedTensorType>(type)) {
- newTypes.append(tensorType.getNumDynamicDims(), indexType);
- }
+ expandType(type, newTypes);
}
return newTypes;
}
@@ -163,6 +166,20 @@
return expandedValue;
}
+static void expandOperand(Location loc, Value operand,
+ SmallVectorImpl<Value> &newOperands,
+ TensorDimMap &tensorDimMap, IndexSet &indexSet,
+ OpBuilder &builder) {
+ if (isDynamicTensor(operand.getType())) {
+ auto expandedValue =
+ consumeExpandedValue(loc, operand, tensorDimMap, indexSet, builder);
+ newOperands.push_back(expandedValue.tensor);
+ newOperands.append(expandedValue.dynamicDims);
+ } else {
+ newOperands.push_back(operand);
+ }
+}
+
// Expands tensor in |operands| into (tensor, dynamic dims...) tuples.
static SmallVector<Value> expandOperands(Location loc, ValueRange operands,
TensorDimMap &tensorDimMap,
@@ -171,25 +188,20 @@
SmallVector<Value> result;
result.reserve(operands.size() * 2);
for (auto operand : operands) {
- if (isDynamicTensor(operand.getType())) {
- auto expandedValue =
- consumeExpandedValue(loc, operand, tensorDimMap, indexSet, builder);
- result.push_back(expandedValue.tensor);
- result.append(expandedValue.dynamicDims);
- } else {
- result.push_back(operand);
- }
+ expandOperand(loc, operand, result, tensorDimMap, indexSet, builder);
}
return result;
}
-static void expandTensorDims(Operation *op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap &tensorDimMap);
+static void expandTensorDims(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap &tensorDimMap);
// Recursively expands tensors into (tensor, dynamic dims...) tuples within the
// given |region|. All branches, ops, and nested regions will be processed.
-static void expandRegion(Region ®ion, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap tensorDimMap) {
+static void expandRegion(Region ®ion, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap tensorDimMap) {
if (region.empty())
return;
@@ -232,14 +244,14 @@
if (region.hasOneBlock()) {
for (auto &op :
llvm::make_early_inc_range(region.front().getOperations())) {
- expandTensorDims(&op, globalMap, indexSet, tensorDimMap);
+ expandTensorDims(&op, symbolTable, globalMap, indexSet, tensorDimMap);
}
} else {
DominanceInfo domInfo(region.getParentOp());
for (auto *blockInfo : llvm::breadth_first(domInfo.getRootNode(®ion))) {
auto *block = blockInfo->getBlock();
for (auto &op : llvm::make_early_inc_range(block->getOperations())) {
- expandTensorDims(&op, globalMap, indexSet, tensorDimMap);
+ expandTensorDims(&op, symbolTable, globalMap, indexSet, tensorDimMap);
}
}
}
@@ -337,30 +349,36 @@
}
static void expandInitializerOp(IREE::Util::InitializerOp op,
+ SymbolTable &symbolTable,
ExpandedGlobalMap &globalMap,
IndexSet &indexSet,
TensorDimMap &tensorDimMap) {
- expandRegion(op.getRegion(), globalMap, indexSet, tensorDimMap);
+ expandRegion(op.getRegion(), symbolTable, globalMap, indexSet, tensorDimMap);
}
// Inserts dimension associate reshapes on tensor arguments.
// Requires that the ExpandCallOp/ExpandReturnOp patterns handle passing dims.
//
// Example:
-// func.func @foo(%0: tensor<?xf32>)
+// util.func @foo(%0: tensor<?xf32>)
// ->
-// func.func @foo(%0: tensor<?xf32>, %d: index) {
+// util.func @foo(%0: tensor<?xf32>, %d: index) {
// %1 = flow.tensor.tie_shape %0 : tensor<?xf32>{%d}
-static void expandFuncOp(mlir::func::FuncOp op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap &tensorDimMap) {
- auto oldType = op.getFunctionType();
- auto inputTypes = expandTypes(oldType.getInputs());
- auto resultTypes = expandTypes(oldType.getResults());
- auto newType = FunctionType::get(op.getContext(), inputTypes, resultTypes);
- if (newType != oldType) {
- op.setType(newType);
+static void expandFuncOp(IREE::Util::FuncOp op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap &tensorDimMap) {
+ // Ignore public/external function signatures but still convert regions.
+ bool canModifyEntryBlock = !IREE::Util::isPublicOrExternal(op);
+ if (canModifyEntryBlock) {
+ op.expandSignature(
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ });
}
- expandRegion(op.getRegion(), globalMap, indexSet, tensorDimMap);
+ expandRegion(op.getRegion(), symbolTable, globalMap, indexSet, tensorDimMap);
}
// Splits tensor operands and results into (tensor, dynamic dims...).
@@ -368,22 +386,31 @@
//
// Example:
// %a = flow.tensor.tie_shape %0 : tensor<?xf32>{%d}
-// %r = call @foo(%a)
+// %r = util.call @foo(%a)
// ->
-// %r, %rd = call @foo(%a, %ad)
+// %r, %rd = util.call @foo(%a, %ad)
// %2 = flow.tensor.tie_shape %r : tensor<?xf32>{%rd}
-static void expandCallOp(mlir::func::CallOp op, IndexSet &indexSet,
- TensorDimMap &tensorDimMap) {
+static void expandCallOp(IREE::Util::CallOp op, SymbolTable &symbolTable,
+ IndexSet &indexSet, TensorDimMap &tensorDimMap) {
if (!usesDynamicTensors(op))
return;
+ // Ignore calls to public/external functions.
+ auto calleeOp = symbolTable.lookup<CallableOpInterface>(op.getCallee());
+ if (IREE::Util::isPublicOrExternal(calleeOp))
+ return;
+
// Build the new call op with expanded operands and results.
OpBuilder builder(op);
- auto operands = expandOperands(op.getLoc(), op.getOperands(), tensorDimMap,
- indexSet, builder);
- auto resultTypes = expandTypes(op.getResultTypes());
- auto newOp = builder.create<mlir::func::CallOp>(op.getLoc(), op.getCallee(),
- resultTypes, operands);
+ auto newOp = op.cloneAndExpand(
+ [&](unsigned i, Value operand, SmallVectorImpl<Value> &newOperands) {
+ expandOperand(op.getLoc(), operand, newOperands, tensorDimMap, indexSet,
+ builder);
+ },
+ [&](unsigned i, Type type, SmallVectorImpl<Type> &newTypes) {
+ expandType(type, newTypes);
+ },
+ builder);
retieResults(op, newOp, tensorDimMap);
op.erase();
@@ -394,17 +421,19 @@
//
// Example:
// %1 = flow.tensor.tie_shape %0 : tensor<?xf32>{%d}
-// return %1
+// util.return %1
// ->
-// return %0, %d
-static void expandReturnOp(mlir::func::ReturnOp op, IndexSet &indexSet,
+// util.return %0, %d
+static void expandReturnOp(IREE::Util::ReturnOp op, IndexSet &indexSet,
TensorDimMap &tensorDimMap) {
if (!usesDynamicTensors(op))
return;
+ if (IREE::Util::isPublicOrExternal(op->getParentOfType<IREE::Util::FuncOp>()))
+ return;
OpBuilder builder(op);
auto operands = expandOperands(op.getLoc(), op.getOperands(), tensorDimMap,
indexSet, builder);
- builder.create<mlir::func::ReturnOp>(op.getLoc(), operands);
+ builder.create<IREE::Util::ReturnOp>(op.getLoc(), operands);
op.erase();
}
@@ -484,8 +513,9 @@
op.erase();
}
-static void expandWhileOp(mlir::scf::WhileOp op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap &tensorDimMap) {
+static void expandWhileOp(mlir::scf::WhileOp op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap &tensorDimMap) {
OpBuilder builder(op);
auto operands = expandOperands(op.getLoc(), op.getOperands(), tensorDimMap,
indexSet, builder);
@@ -498,14 +528,17 @@
newOp.getBefore().takeBody(op.getBefore());
newOp.getAfter().takeBody(op.getAfter());
- expandRegion(newOp.getBefore(), globalMap, indexSet, tensorDimMap);
- expandRegion(newOp.getAfter(), globalMap, indexSet, tensorDimMap);
+ expandRegion(newOp.getBefore(), symbolTable, globalMap, indexSet,
+ tensorDimMap);
+ expandRegion(newOp.getAfter(), symbolTable, globalMap, indexSet,
+ tensorDimMap);
retieResults(op, newOp, tensorDimMap);
op.erase();
}
-static void expandIfOp(mlir::scf::IfOp op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap &tensorDimMap) {
+static void expandIfOp(mlir::scf::IfOp op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap &tensorDimMap) {
OpBuilder builder(op);
auto resultTypes = expandTypes(op.getResultTypes());
@@ -513,11 +546,12 @@
op.getLoc(), resultTypes, op.getOperand(), op.elseBlock() != nullptr);
newOp.getBodyRegion().takeBody(op.getBodyRegion());
- expandRegion(newOp.getBodyRegion(), globalMap, indexSet, tensorDimMap);
-
+ expandRegion(newOp.getBodyRegion(), symbolTable, globalMap, indexSet,
+ tensorDimMap);
if (newOp.elseBlock()) {
newOp.getElseRegion().takeBody(op.getElseRegion());
- expandRegion(newOp.getElseRegion(), globalMap, indexSet, tensorDimMap);
+ expandRegion(newOp.getElseRegion(), symbolTable, globalMap, indexSet,
+ tensorDimMap);
}
retieResults(op, newOp, tensorDimMap);
@@ -544,19 +578,21 @@
}
// Recursively expands tensors into (tensor, dynamic dims...) in |op|.
-static void expandTensorDims(Operation *op, ExpandedGlobalMap &globalMap,
- IndexSet &indexSet, TensorDimMap &tensorDimMap) {
+static void expandTensorDims(Operation *op, SymbolTable &symbolTable,
+ ExpandedGlobalMap &globalMap, IndexSet &indexSet,
+ TensorDimMap &tensorDimMap) {
if (auto loadOp = dyn_cast<IREE::Util::GlobalLoadOpInterface>(op)) {
expandGlobalLoadOp(loadOp, globalMap, indexSet, tensorDimMap);
} else if (auto storeOp = dyn_cast<IREE::Util::GlobalStoreOpInterface>(op)) {
expandGlobalStoreOp(storeOp, globalMap, indexSet, tensorDimMap);
} else if (auto initializerOp = dyn_cast<IREE::Util::InitializerOp>(op)) {
- expandInitializerOp(initializerOp, globalMap, indexSet, tensorDimMap);
- } else if (auto funcOp = dyn_cast<mlir::func::FuncOp>(op)) {
- expandFuncOp(funcOp, globalMap, indexSet, tensorDimMap);
- } else if (auto callOp = dyn_cast<mlir::func::CallOp>(op)) {
- expandCallOp(callOp, indexSet, tensorDimMap);
- } else if (auto returnOp = dyn_cast<mlir::func::ReturnOp>(op)) {
+ expandInitializerOp(initializerOp, symbolTable, globalMap, indexSet,
+ tensorDimMap);
+ } else if (auto funcOp = dyn_cast<IREE::Util::FuncOp>(op)) {
+ expandFuncOp(funcOp, symbolTable, globalMap, indexSet, tensorDimMap);
+ } else if (auto callOp = dyn_cast<IREE::Util::CallOp>(op)) {
+ expandCallOp(callOp, symbolTable, indexSet, tensorDimMap);
+ } else if (auto returnOp = dyn_cast<IREE::Util::ReturnOp>(op)) {
expandReturnOp(returnOp, indexSet, tensorDimMap);
} else if (auto branchOp = dyn_cast<mlir::cf::BranchOp>(op)) {
expandBranchOp(branchOp, indexSet, tensorDimMap);
@@ -565,9 +601,9 @@
} else if (auto selectOp = dyn_cast<mlir::arith::SelectOp>(op)) {
expandSelectOp(selectOp, indexSet, tensorDimMap);
} else if (auto whileOp = dyn_cast<mlir::scf::WhileOp>(op)) {
- expandWhileOp(whileOp, globalMap, indexSet, tensorDimMap);
+ expandWhileOp(whileOp, symbolTable, globalMap, indexSet, tensorDimMap);
} else if (auto ifOp = dyn_cast<mlir::scf::IfOp>(op)) {
- expandIfOp(ifOp, globalMap, indexSet, tensorDimMap);
+ expandIfOp(ifOp, symbolTable, globalMap, indexSet, tensorDimMap);
} else if (auto yieldOp = dyn_cast<mlir::scf::YieldOp>(op)) {
expandScfYieldOp(yieldOp, indexSet, tensorDimMap);
} else if (auto conditionOp = dyn_cast<mlir::scf::ConditionOp>(op)) {
@@ -593,7 +629,6 @@
ExpandTensorShapesPass() = default;
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<mlir::func::FuncDialect>();
registry.insert<mlir::arith::ArithDialect>();
registry.insert<IREE::Flow::FlowDialect>();
registry.insert<IREE::Util::UtilDialect>();
@@ -601,9 +636,10 @@
void runOnOperation() override {
auto rootOp = getOperation();
+ SymbolTable symbolTable(rootOp);
// Expand all util.global ops holding tensor into tensor + dynamic dims.
- auto globalMap = expandGlobalTensorDims(rootOp);
+ auto globalMap = expandGlobalTensorDims(rootOp, symbolTable);
// Walk the entire IR tree and expand the globals.
// We could do this via pattern application but that gets much trickier to
@@ -618,7 +654,8 @@
? OpBuilder(callableOp)
: OpBuilder::atBlockBegin(®ion->front()));
TensorDimMap tensorDimMap;
- expandTensorDims(callableOp, globalMap, indexSet, tensorDimMap);
+ expandTensorDims(callableOp, symbolTable, globalMap, indexSet,
+ tensorDimMap);
}
}
};
diff --git a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
index 63c1a3a..bf62869 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp
@@ -13,7 +13,6 @@
#include "iree/compiler/Utils/PassUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
@@ -24,7 +23,7 @@
namespace mlir::iree_compiler::GlobalOptimization {
using FunctionLikeNest =
- MultiOpNest<func::FuncOp, IREE::Util::InitializerOp, IREE::Util::FuncOp>;
+ MultiOpNest<IREE::Util::InitializerOp, IREE::Util::FuncOp>;
class MaterializeHomogeneousEncodingsPass
: public MaterializeHomogeneousEncodingsBase<
diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
index e53f7cd..ccf47c5 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
@@ -9,7 +9,6 @@
#include "iree/compiler/Dialect/Util/Transforms/Passes.h"
#include "iree/compiler/Modules/IO/Parameters/Transforms/Passes.h"
#include "iree/compiler/Utils/PassUtils.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Transforms/Passes.h"
@@ -17,7 +16,7 @@
namespace mlir::iree_compiler::GlobalOptimization {
using FunctionLikeNest =
- MultiOpNest<func::FuncOp, IREE::Util::InitializerOp, IREE::Util::FuncOp>;
+ MultiOpNest<IREE::Util::InitializerOp, IREE::Util::FuncOp>;
static llvm::cl::opt<bool> clEnableQuantizedMatmulReassociation(
"iree-global-opt-enable-quantized-matmul-reassociation",
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/cleanup_numeric_narrowing.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/cleanup_numeric_narrowing.mlir
index 27a59fb..963202b 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/cleanup_numeric_narrowing.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/cleanup_numeric_narrowing.mlir
@@ -1,8 +1,8 @@
// RUN: iree-opt --iree-global-opt-cleanup-numeric-narrowing %s | FileCheck %s
// CHECK-LABEL: @remove_inferences
-func.func @remove_inferences(%arg0 : tensor<5x3xf32>) -> tensor<5x3xf32> {
+util.func public @remove_inferences(%arg0 : tensor<5x3xf32>) -> tensor<5x3xf32> {
%0 = util.numeric.optional_narrow %arg0 : tensor<5x3xf32> as ui3 {max_value = 5 : ui3, min_value = 5 : ui3}
- // CHECK: return %arg0
- return %0 : tensor<5x3xf32>
+ // CHECK: util.return %arg0
+ util.return %0 : tensor<5x3xf32>
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/conv1x1_to_matmul.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/conv1x1_to_matmul.mlir
index 18df3dd..ad1cdbd 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/conv1x1_to_matmul.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/conv1x1_to_matmul.mlir
@@ -1,12 +1,12 @@
// RUN: iree-opt --split-input-file -iree-global-opt-convert-1x1-filter-conv2d-to-matmul %s | FileCheck %s
-func.func @nhwc_conv_2d(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
+util.func public @nhwc_conv_2d(%input: tensor<1x4x5x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x5x7xf32> {
%0 = tensor.empty() : tensor<1x4x5x7xf32>
%1 = linalg.conv_2d_nhwc_hwcf {
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x4x5x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x5x7xf32>) -> tensor<1x4x5x7xf32>
- return %1 : tensor<1x4x5x7xf32>
+ util.return %1 : tensor<1x4x5x7xf32>
}
// CHECK: @nhwc_conv_2d
@@ -18,12 +18,12 @@
// CHECK: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0, 1, 2], [3]] : tensor<1x4x5x7xf32> into tensor<20x7xf32>
// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INPUT]], %[[RESHAPED_FILTER]] : tensor<20x2xf32>, tensor<2x7xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<20x7xf32>)
// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1, 2], [3]] : tensor<20x7xf32> into tensor<1x4x5x7xf32>
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
// CHECK: @dynamic_nhwc_conv_2d
-func.func @dynamic_nhwc_conv_2d(%input: tensor<1x4x?x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x?x7xf32> {
+util.func public @dynamic_nhwc_conv_2d(%input: tensor<1x4x?x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x?x7xf32> {
%c2 = arith.constant 2 : index
%d2 = tensor.dim %input, %c2 : tensor<1x4x?x2xf32>
%0 = tensor.empty(%d2) : tensor<1x4x?x7xf32>
@@ -31,7 +31,7 @@
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x4x?x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x4x?x7xf32>) -> tensor<1x4x?x7xf32>
- return %1 : tensor<1x4x?x7xf32>
+ util.return %1 : tensor<1x4x?x7xf32>
}
// CHECK: %[[INPUT:.+]]: tensor<1x4x?x2xf32>
@@ -47,7 +47,7 @@
// -----
-func.func @fail_dynamic_nhwc_conv_2d(%input: tensor<1x?x?x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x?x?x7xf32> {
+util.func public @fail_dynamic_nhwc_conv_2d(%input: tensor<1x?x?x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x?x?x7xf32> {
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%d1 = tensor.dim %input, %c1 : tensor<1x?x?x2xf32>
@@ -57,7 +57,7 @@
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x?x?x2xf32>, tensor<1x1x2x7xf32>) outs(%0 : tensor<1x?x?x7xf32>) -> tensor<1x?x?x7xf32>
- return %1 : tensor<1x?x?x7xf32>
+ util.return %1 : tensor<1x?x?x7xf32>
}
// CHECK: @fail_dynamic_nhwc_conv_2d
@@ -65,13 +65,13 @@
// -----
-func.func @nchw_conv_2d(%input: tensor<1x2x4x5xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x4x5xf32> {
+util.func public @nchw_conv_2d(%input: tensor<1x2x4x5xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x4x5xf32> {
%0 = tensor.empty() : tensor<1x7x4x5xf32>
%1 = linalg.conv_2d_nchw_fchw {
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x2x4x5xf32>, tensor<7x2x1x1xf32>) outs(%0 : tensor<1x7x4x5xf32>) -> tensor<1x7x4x5xf32>
- return %1 : tensor<1x7x4x5xf32>
+ util.return %1 : tensor<1x7x4x5xf32>
}
// CHECK: @nchw_conv_2d
// CHECK: %[[INPUT:.+]]: tensor<1x2x4x5xf32>
@@ -82,11 +82,11 @@
// CHECK: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0, 1], [2, 3]] : tensor<1x7x4x5xf32> into tensor<7x20xf32>
// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_FILTER]], %[[RESHAPED_INPUT]] : tensor<7x2xf32>, tensor<2x20xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<7x20xf32>)
// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1], [2, 3]] : tensor<7x20xf32> into tensor<1x7x4x5xf32>
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @dynamic_nchw_conv_2d(%input: tensor<1x2x4x?xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x4x?xf32> {
+util.func public @dynamic_nchw_conv_2d(%input: tensor<1x2x4x?xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x4x?xf32> {
%c3 = arith.constant 3 : index
%d3 = tensor.dim %input, %c3 : tensor<1x2x4x?xf32>
%0 = tensor.empty(%d3) : tensor<1x7x4x?xf32>
@@ -94,7 +94,7 @@
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x2x4x?xf32>, tensor<7x2x1x1xf32>) outs(%0 : tensor<1x7x4x?xf32>) -> tensor<1x7x4x?xf32>
- return %1 : tensor<1x7x4x?xf32>
+ util.return %1 : tensor<1x7x4x?xf32>
}
// CHECK: @dynamic_nchw_conv_2d
@@ -108,11 +108,11 @@
// CHECK: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0, 1], [2, 3]] : tensor<1x7x4x?xf32> into tensor<7x?xf32>
// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_FILTER]], %[[RESHAPED_INPUT]] : tensor<7x2xf32>, tensor<2x?xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<7x?xf32>)
// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1], [2, 3]] : tensor<7x?xf32> into tensor<1x7x4x?xf32>
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @fail_dynamic_nchw_conv_2d(%input: tensor<1x2x?x?xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x?x?xf32> {
+util.func public @fail_dynamic_nchw_conv_2d(%input: tensor<1x2x?x?xf32>, %filter: tensor<7x2x1x1xf32>) -> tensor<1x7x?x?xf32> {
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%d2 = tensor.dim %input, %c2 : tensor<1x2x?x?xf32>
@@ -122,7 +122,7 @@
dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>
} ins(%input, %filter : tensor<1x2x?x?xf32>, tensor<7x2x1x1xf32>) outs(%0 : tensor<1x7x?x?xf32>) -> tensor<1x7x?x?xf32>
- return %1 : tensor<1x7x?x?xf32>
+ util.return %1 : tensor<1x7x?x?xf32>
}
// CHECK: @fail_dynamic_nchw_conv_2d
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/demote_contraction_inputs_to_bf16.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/demote_contraction_inputs_to_bf16.mlir
index 66b9067..ff717b8 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/demote_contraction_inputs_to_bf16.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/demote_contraction_inputs_to_bf16.mlir
@@ -1,10 +1,10 @@
// RUN: iree-opt --split-input-file -iree-global-opt-demote-contraction-inputs-to-bf16 %s | FileCheck %s
-func.func @matmul_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @matmul_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK: @matmul_f32f32f32
@@ -23,11 +23,11 @@
// -----
-func.func @dynamic_matmul_f32f32f32(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @dynamic_matmul_f32f32f32(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
// CHECK: @dynamic_matmul_f32f32f32
@@ -44,11 +44,11 @@
// -----
-func.func @batch_matmul_f32f32f32(%arg0 : tensor<4x100x250xf32>, %arg1 : tensor<4x250x500xf32>,
+util.func public @batch_matmul_f32f32f32(%arg0 : tensor<4x100x250xf32>, %arg1 : tensor<4x250x500xf32>,
%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<4x100x250xf32>, tensor<4x250x500xf32>)
outs(%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32>
- return %0 : tensor<4x100x500xf32>
+ util.return %0 : tensor<4x100x500xf32>
}
// CHECK: @batch_matmul_f32f32f32
@@ -67,11 +67,11 @@
// -----
-func.func @matvec_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250xf32>,
+util.func public @matvec_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250xf32>,
%arg2 : tensor<100xf32>) -> tensor<100xf32> {
%0 = linalg.matvec ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<250xf32>)
outs(%arg2 : tensor<100xf32>) -> tensor<100xf32>
- return %0 : tensor<100xf32>
+ util.return %0 : tensor<100xf32>
}
// CHECK: @matvec_f32f32f32
@@ -90,11 +90,11 @@
// -----
-func.func @batch_vecmat_f32f32f32(%arg0 : tensor<4x250xf32>, %arg1 : tensor<4x250x500xf32>,
+util.func public @batch_vecmat_f32f32f32(%arg0 : tensor<4x250xf32>, %arg1 : tensor<4x250x500xf32>,
%arg2 : tensor<4x500xf32>) -> tensor<4x500xf32> {
%0 = linalg.batch_vecmat ins(%arg0, %arg1 : tensor<4x250xf32>, tensor<4x250x500xf32>)
outs(%arg2 : tensor<4x500xf32>) -> tensor<4x500xf32>
- return %0 : tensor<4x500xf32>
+ util.return %0 : tensor<4x500xf32>
}
// CHECK: @batch_vecmat_f32f32f32
@@ -113,11 +113,11 @@
// -----
-func.func @nonmatch_matmul_f32f32f64(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @nonmatch_matmul_f32f32f64(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<100x500xf64>) -> tensor<100x500xf64> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<100x500xf64>) -> tensor<100x500xf64>
- return %0 : tensor<100x500xf64>
+ util.return %0 : tensor<100x500xf64>
}
// CHECK: @nonmatch_matmul_f32f32f64
@@ -130,11 +130,11 @@
// -----
-func.func @batch_matmul_transpose_a_f32f32f32(%arg0 : tensor<4x250x100xf32>, %arg1 : tensor<4x250x500xf32>,
+util.func public @batch_matmul_transpose_a_f32f32f32(%arg0 : tensor<4x250x100xf32>, %arg1 : tensor<4x250x500xf32>,
%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32> {
%0 = linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : tensor<4x250x100xf32>, tensor<4x250x500xf32>)
outs(%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32>
- return %0 : tensor<4x100x500xf32>
+ util.return %0 : tensor<4x100x500xf32>
}
// CHECK: @batch_matmul_transpose_a_f32f32f32
@@ -153,11 +153,11 @@
// -----
-func.func @batch_matmul_transpose_b_f32f32f32(%arg0 : tensor<4x100x250xf32>, %arg1 : tensor<4x500x250xf32>,
+util.func public @batch_matmul_transpose_b_f32f32f32(%arg0 : tensor<4x100x250xf32>, %arg1 : tensor<4x500x250xf32>,
%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32> {
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : tensor<4x100x250xf32>, tensor<4x500x250xf32>)
outs(%arg2 : tensor<4x100x500xf32>) -> tensor<4x100x500xf32>
- return %0 : tensor<4x100x500xf32>
+ util.return %0 : tensor<4x100x500xf32>
}
// CHECK: @batch_matmul_transpose_b_f32f32f32
@@ -176,11 +176,11 @@
// -----
-func.func @matmul_transpose_a_f32f32f32(%arg0 : tensor<250x100xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @matmul_transpose_a_f32f32f32(%arg0 : tensor<250x100xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<250x100xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK: @matmul_transpose_a_f32f32f32
@@ -199,11 +199,11 @@
// -----
-func.func @matmul_transpose_b_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<500x250xf32>,
+util.func public @matmul_transpose_b_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<500x250xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<500x250xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK: @matmul_transpose_b_f32f32f32
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/detach_elementwise_from_named_ops.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/detach_elementwise_from_named_ops.mlir
index 162acf9..cec787f 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/detach_elementwise_from_named_ops.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/detach_elementwise_from_named_ops.mlir
@@ -1,6 +1,6 @@
// RUN: iree-opt --split-input-file --iree-global-opt-detach-elementwise-from-named-ops --mlir-print-local-scope %s | FileCheck %s
-func.func @matmul(%a: tensor<?x64xf32>, %b: tensor<64x?xf32>, %c: tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @matmul(%a: tensor<?x64xf32>, %b: tensor<64x?xf32>, %c: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
@@ -10,10 +10,10 @@
linalg.yield %1 : f32
} -> tensor<?x?xf32>
%1 = linalg.matmul ins(%a, %b : tensor<?x64xf32>, tensor<64x?xf32>) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @matmul
+// CHECK-LABEL: util.func public @matmul
// CHECK-SAME: (%[[A:.+]]: tensor<?x64xf32>, %[[B:.+]]: tensor<64x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -36,11 +36,11 @@
// CHECK: ^{{.+}}(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %{{.+}}: f32):
// CHECK: %[[ADD:.+]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
// CHECK: linalg.yield %[[ADD]] : f32
-// CHECK: return %[[EW]]
+// CHECK: util.return %[[EW]]
// -----
-func.func @batch_matmul(%a: tensor<?x8x?xi32>, %b: tensor<?x?x16xi32>, %c: tensor<?x8x16xi32>) -> tensor<?x8x16xi32> {
+util.func public @batch_matmul(%a: tensor<?x8x?xi32>, %b: tensor<?x?x16xi32>, %c: tensor<?x8x16xi32>) -> tensor<?x8x16xi32> {
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
iterator_types = ["parallel", "parallel", "parallel"]}
@@ -50,10 +50,10 @@
linalg.yield %1 : i32
} -> tensor<?x8x16xi32>
%1 = linalg.batch_matmul ins(%a, %b : tensor<?x8x?xi32>, tensor<?x?x16xi32>) outs(%0 : tensor<?x8x16xi32>) -> tensor<?x8x16xi32>
- return %1 : tensor<?x8x16xi32>
+ util.return %1 : tensor<?x8x16xi32>
}
-// CHECK-LABEL: func @batch_matmul
+// CHECK-LABEL: util.func public @batch_matmul
// CHECK-SAME: (%[[A:.+]]: tensor<?x8x?xi32>, %[[B:.+]]: tensor<?x?x16xi32>, %[[ARG2:.+]]: tensor<?x8x16xi32>)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -71,11 +71,11 @@
// CHECK-SAME: outs(%[[FILL]] : tensor<?x8x16xi32>)
// CHECK: %[[ADD:.+]] = arith.addi
// CHECK: linalg.yield %[[ADD]] : i32
-// CHECK: return %[[EW]]
+// CHECK: util.return %[[EW]]
// -----
-func.func @conv(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3x32xf32>, %init: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
+util.func public @conv(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3x32xf32>, %init: tensor<32xf32>) -> tensor<1x112x112x32xf32> {
%init0 = tensor.empty() : tensor<1x112x112x32xf32>
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
@@ -86,10 +86,10 @@
} -> tensor<1x112x112x32xf32>
%1 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) outs(%0 : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
- return %1 : tensor<1x112x112x32xf32>
+ util.return %1 : tensor<1x112x112x32xf32>
}
-// CHECK-LABEL: func @conv
+// CHECK-LABEL: util.func public @conv
// CHECK-SAME: (%{{.+}}: tensor<1x225x225x3xf32>, %{{.+}}: tensor<3x3x3x32xf32>, %[[BIAS:.+]]: tensor<32xf32>)
// CHECK: %[[INIT:.+]] = linalg.generic
// CHECK-SAME: ins(%[[BIAS]] :
@@ -101,7 +101,7 @@
// -----
-func.func @keep_fill(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @keep_fill(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
@@ -111,24 +111,24 @@
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%gemm = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %gemm : tensor<?x?xf32>
+ util.return %gemm : tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @keep_fill
+// CHECK-LABEL: util.func public @keep_fill
// CHECK-NOT: linalg.generic
// -----
-func.func @keep_arg(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @keep_arg(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: func.func @keep_arg
+// CHECK-LABEL: util.func public @keep_arg
// CHECK-NOT: linalg.generic
// -----
-func.func @fft_cst_output(%arg0 : tensor<3x2190x1x512xf32>)
+util.func public @fft_cst_output(%arg0 : tensor<3x2190x1x512xf32>)
-> (tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>) {
%c1 = arith.constant 1 : index
%cst = arith.constant dense<1.000000e+00> : tensor<1xf32>
@@ -138,9 +138,9 @@
ins(%c1, %cst, %cst_0 : index, tensor<1xf32>, tensor<1xf32>)
outs(%arg0, %cst_1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>)
: tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
- return %0#0, %0#1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
+ util.return %0#0, %0#1 : tensor<3x2190x1x512xf32>, tensor<3x2190x1x512xf32>
}
-// CHECK-LABEL: func @fft_cst_output(
+// CHECK-LABEL: util.func public @fft_cst_output(
// CHECK-SAME: %[[ARG0:.+]]: tensor<3x2190x1x512xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32
// CHECK-DAG: %[[INIT:.+]] = tensor.empty()
@@ -149,7 +149,7 @@
// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[FFT:.+]]:2 = iree_linalg_ext.fft
// CHECK-SAME: outs(%[[ARG0]], %[[FILL]] :
-// CHECK: return %[[FFT]]#0, %[[FFT]]#1
+// CHECK: util.return %[[FFT]]#0, %[[FFT]]#1
// -----
@@ -158,7 +158,7 @@
#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0 * 2 + d3, d1 * 2 + d4, d2)>
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d3, d4)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
-func.func @generic_cst_output(%arg0 : tensor<114x114x64xf32>) -> tensor<56x56x64xf32> {
+util.func public @generic_cst_output(%arg0 : tensor<114x114x64xf32>) -> tensor<56x56x64xf32> {
%cst = arith.constant dense<0xFF800000> : tensor<56x56x64xf32>
%1 = tensor.empty() : tensor<3x3xf32>
%2 = linalg.generic {
@@ -168,9 +168,9 @@
%3 = arith.maximumf %out, %in : f32
linalg.yield %3 : f32
} -> tensor<56x56x64xf32>
- return %2 : tensor<56x56x64xf32>
+ util.return %2 : tensor<56x56x64xf32>
}
-// CHECK-LABEL: func @generic_cst_output
+// CHECK-LABEL: util.func public @generic_cst_output
// CHECK-SAME: %[[ARG0:.+]]: tensor<114x114x64xf32>
// CHECK: %[[CST:.+]] = arith.constant 0xFF800000 : f32
// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<56x56x64xf32>
@@ -179,4 +179,4 @@
// CHECK-SAME: outs(%[[INIT]] :
// CHECK: %[[GENERIC:.+]] = linalg.generic
// CHECK-SAME: outs(%[[FILL]] :
-// CHECK: return %[[GENERIC]]
+// CHECK: util.return %[[GENERIC]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/expand_tensor_shapes.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/expand_tensor_shapes.mlir
index 7f8c1c2..de519db 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/expand_tensor_shapes.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/expand_tensor_shapes.mlir
@@ -8,7 +8,7 @@
util.global private mutable @loadedGlobal : tensor<4x?x?x2xf32>
// CHECK-LABEL: @globalLoad
-func.func @globalLoad() {
+util.func private @globalLoad() {
// CHECK-NEXT: %[[TENSOR:.+]] = util.global.load @loadedGlobal : tensor<4x?x?x2xf32>
// CHECK-NEXT: %[[D1:.+]] = util.global.load @loadedGlobal__d1 : index
// CHECK-NEXT: %[[D2:.+]] = util.global.load @loadedGlobal__d2 : index
@@ -16,7 +16,7 @@
%0 = util.global.load @loadedGlobal : tensor<4x?x?x2xf32>
// CHECK-NEXT: util.optimization_barrier %[[TIED]]
util.optimization_barrier %0 : tensor<4x?x?x2xf32>
- return
+ util.return
}
// -----
@@ -30,13 +30,13 @@
// CHECK-LABEL: @globalStore
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[D1:.+]]: index, %[[D2:.+]]: index)
-func.func @globalStore(%arg0: tensor<4x?x?x2xf32>) {
+util.func private @globalStore(%arg0: tensor<4x?x?x2xf32>) {
// CHECK-NEXT: %[[TIED:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[D1]], %[[D2]]}
// CHECK-NEXT: util.global.store %[[ARG0]], @storedGlobal : tensor<4x?x?x2xf32>
// CHECK-NEXT: util.global.store %[[D1]], @storedGlobal__d1 : index
// CHECK-NEXT: util.global.store %[[D2]], @storedGlobal__d2 : index
util.global.store %arg0, @storedGlobal : tensor<4x?x?x2xf32>
- return
+ util.return
}
// -----
@@ -46,7 +46,7 @@
// CHECK-LABEL: @funcArgs
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xi32>, %[[ARG1_D0:.+]]: index)
-func.func @funcArgs(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
+util.func private @funcArgs(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
// CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
// CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
@@ -55,7 +55,7 @@
// CHECK-NEXT: util.optimization_barrier %[[TIED_ARG1]]
util.optimization_barrier %arg1 : tensor<?xi32>
- return
+ util.return
}
// -----
@@ -65,14 +65,14 @@
// CHECK-LABEL: @funcResults
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xi32>, %[[ARG1_D0:.+]]: index)
-func.func @funcResults(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>) {
+util.func private @funcResults(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>) {
// CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
// CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
- // NOTE: we return %arg0/%arg1 instead of the tied ones - this helps the ties
+ // NOTE: we util.return %arg0/%arg1 instead of the tied ones - this helps the ties
// get dropped early when they aren't needed.
- // CHECK-NEXT: return %[[ARG0]], %[[ARG0_D1]], %[[ARG0_D2]], %[[ARG1]], %[[ARG1_D0]]
- return %arg0, %arg1 : tensor<4x?x?x2xf32>, tensor<?xi32>
+ // CHECK-NEXT: util.return %[[ARG0]], %[[ARG0_D1]], %[[ARG0_D2]], %[[ARG1]], %[[ARG1_D0]]
+ util.return %arg0, %arg1 : tensor<4x?x?x2xf32>, tensor<?xi32>
}
// -----
@@ -82,13 +82,13 @@
// CHECK-LABEL: @caller
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xi32>, %[[ARG1_D0:.+]]: index)
-func.func @caller(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
+util.func private @caller(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
// CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
// CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
- // CHECK: %[[RET:.+]]:5 = call @callee(%[[ARG0]], %[[ARG0_D1]], %[[ARG0_D2]], %[[ARG1]], %[[ARG1_D0]])
+ // CHECK: %[[RET:.+]]:5 = util.call @callee(%[[ARG0]], %[[ARG0_D1]], %[[ARG0_D2]], %[[ARG1]], %[[ARG1_D0]])
// CHECK-SAME: (tensor<4x?x?x2xf32>, index, index, tensor<?xi32>, index) -> (tensor<4x?x?x2xf32>, index, index, tensor<?xi32>, index)
- %0:2 = call @callee(%arg0, %arg1) : (tensor<4x?x?x2xf32>, tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>)
+ %0:2 = util.call @callee(%arg0, %arg1) : (tensor<4x?x?x2xf32>, tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>)
// CHECK-NEXT: %[[TIED_RET0:.+]] = flow.tensor.tie_shape %[[RET]]#0 : tensor<4x?x?x2xf32>{%[[RET]]#1, %[[RET]]#2}
// CHECK-NEXT: %[[TIED_RET1:.+]] = flow.tensor.tie_shape %[[RET]]#3 : tensor<?xi32>{%[[RET]]#4}
@@ -98,10 +98,55 @@
// CHECK-NEXT: util.optimization_barrier %[[TIED_RET1]]
util.optimization_barrier %0#1 : tensor<?xi32>
- return
+ util.return
}
-func.func private @callee(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>)
+util.func private @callee(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) -> (tensor<4x?x?x2xf32>, tensor<?xi32>) {
+ util.return %arg0, %arg1 : tensor<4x?x?x2xf32>, tensor<?xi32>
+}
+
+// -----
+
+// Tests that tied operands are updated when we expand functions/calls.
+
+// CHECK-LABEL: @tiedCaller
+// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index,
+// CHECK-SAME: %[[ARG1:.+]]: tensor<?xi32>, %[[ARG1_D0:.+]]: index)
+util.func private @tiedCaller(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
+ // CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
+ // CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
+
+ // CHECK: %[[RET:.+]]:5 = util.call @tiedCallee(%[[ARG0]], %[[ARG0_D1]], %[[ARG0_D2]], %[[ARG1]], %[[ARG1_D0]])
+ // CHECK-SAME: (tensor<4x?x?x2xf32>, index, index, tensor<?xi32>, index) -> (tensor<4x?x?x2xf32>, index, index, %[[ARG1]], index)
+ %0:2 = util.call @tiedCallee(%arg0, %arg1) : (tensor<4x?x?x2xf32>, tensor<?xi32>) -> (tensor<4x?x?x2xf32>, %arg1)
+
+ // CHECK-NEXT: %[[TIED_RET0:.+]] = flow.tensor.tie_shape %[[RET]]#0 : tensor<4x?x?x2xf32>{%[[RET]]#1, %[[RET]]#2}
+ // CHECK-NEXT: %[[TIED_RET1:.+]] = flow.tensor.tie_shape %[[RET]]#3 : tensor<?xi32>{%[[RET]]#4}
+
+ // CHECK-NEXT: util.optimization_barrier %[[TIED_RET0]]
+ util.optimization_barrier %0#0 : tensor<4x?x?x2xf32>
+ // CHECK-NEXT: util.optimization_barrier %[[TIED_RET1]]
+ util.optimization_barrier %0#1 : tensor<?xi32>
+
+ util.return
+}
+
+// CHECK-LABEL: util.func private @tiedCallee
+// CHECK-SAME: (%[[CALLEE_ARG0:.+]]: tensor<4x?x?x2xf32>, %[[CALLEE_ARG0_D1:.+]]: index, %[[CALLEE_ARG0_D2:.+]]: index,
+// CHECK-SAME: %[[CALLEE_ARG1:.+]]: tensor<?xi32>, %[[CALLEE_ARG1_D0:.+]]: index)
+// CHECK-SAME: -> (tensor<4x?x?x2xf32>, index, index, %[[CALLEE_ARG1]], index)
+util.func private @tiedCallee(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) -> (tensor<4x?x?x2xf32>, %arg1) {
+ // CHECK-NEXT: %[[CALLEE_TIED_ARG0:.+]] = flow.tensor.tie_shape %[[CALLEE_ARG0]] : tensor<4x?x?x2xf32>{%[[CALLEE_ARG0_D1]], %[[CALLEE_ARG0_D2]]}
+ // CHECK-NEXT: %[[CALLEE_TIED_ARG1:.+]] = flow.tensor.tie_shape %[[CALLEE_ARG1]] : tensor<?xi32>{%[[CALLEE_ARG1_D0]]}
+
+ // CHECK-NEXT: util.optimization_barrier %[[CALLEE_TIED_ARG0]]
+ util.optimization_barrier %arg0 : tensor<4x?x?x2xf32>
+ // CHECK-NEXT: util.optimization_barrier %[[CALLEE_TIED_ARG1]]
+ util.optimization_barrier %arg1 : tensor<?xi32>
+
+ // CHECK-NEXT: util.return %[[CALLEE_ARG0]], %[[CALLEE_ARG0_D1]], %[[CALLEE_ARG0_D2]], %[[CALLEE_ARG1]], %[[CALLEE_ARG1_D0]]
+ util.return %arg0, %arg1 : tensor<4x?x?x2xf32>, tensor<?xi32>
+}
// -----
@@ -110,7 +155,7 @@
// CHECK-LABEL: @br
// CHECK-SAME: (%[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index,
// CHECK-SAME: %[[ARG1:.+]]: tensor<?xi32>, %[[ARG1_D0:.+]]: index)
-func.func @br(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
+util.func private @br(%arg0: tensor<4x?x?x2xf32>, %arg1: tensor<?xi32>) {
// CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
// CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
@@ -128,7 +173,7 @@
// CHECK-NEXT: util.optimization_barrier %[[TIED_BB1_ARG1]]
util.optimization_barrier %bb1_arg1 : tensor<?xi32>
- return
+ util.return
}
// -----
@@ -138,7 +183,7 @@
// CHECK-LABEL: @select
// CHECK-SAME: (%[[COND:.+]]: i1,
// CHECK-SAME: %[[ARG0:.+]]: tensor<4x?x?x2xf32>, %[[ARG0_D1:.+]]: index, %[[ARG0_D2:.+]]: index, %[[ARG1:.+]]: tensor<4x?x?x2xf32>, %[[ARG1_D1:.+]]: index, %[[ARG1_D2:.+]]: index)
-func.func @select(%cond: i1, %arg0: tensor<4x?x?x2xf32>, %arg1: tensor<4x?x?x2xf32>) {
+util.func private @select(%cond: i1, %arg0: tensor<4x?x?x2xf32>, %arg1: tensor<4x?x?x2xf32>) {
// CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
// CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<4x?x?x2xf32>{%[[ARG1_D1]], %[[ARG1_D2]]}
@@ -151,14 +196,14 @@
// CHECK-NEXT: util.optimization_barrier %[[SEL_TIED]]
util.optimization_barrier %0 : tensor<4x?x?x2xf32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @scf_while
// CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>, %[[ARG1:.+]]: index, %[[ARG2:.+]]: i32
-func.func @scf_while(%arg0 : tensor<?xf32>, %arg1 : i32) {
+util.func private @scf_while(%arg0 : tensor<?xf32>, %arg1 : i32) {
%zero = arith.constant 0 : i32
%one = arith.constant 1 : i32
// CHECK: %[[TIE:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<?xf32>{%[[ARG1]]}
@@ -188,14 +233,14 @@
// CHECK: %[[TIE:.+]] = flow.tensor.tie_shape %[[WHILE]]#1 : tensor<?xf32>{%[[WHILE]]#2}
// CHECK: %[[BARRIER:.+]] = util.optimization_barrier %[[TIE]]
util.optimization_barrier %0#1 : tensor<?xf32>
- return
+ util.return
}
// -----
-// CHECK-LABEL: func.func @scf_if
+// CHECK-LABEL: @scf_if
// CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>, %[[ARG1:.+]]: index, %[[ARG2:.+]]: i1
-func.func @scf_if(%arg0 : tensor<?xf32>, %arg1 : i1) {
+util.func private @scf_if(%arg0 : tensor<?xf32>, %arg1 : i1) {
// CHECK: %[[TIE:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<?xf32>{%[[ARG1]]}
// CHECK: %[[IF:.+]]:2 = scf.if %[[ARG2]] -> (tensor<?xf32>, index) {
%0 = scf.if %arg1 -> tensor<?xf32> {
@@ -213,5 +258,5 @@
// CHECK: %[[TIE:.+]] = flow.tensor.tie_shape %[[IF]]#0 : tensor<?xf32>{%[[IF]]#1}
// CHECK: %[[BARRIER:.+]] = util.optimization_barrier %[[TIE]] : tensor<?xf32>
util.optimization_barrier %0 : tensor<?xf32>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/flow_hoist_into_globals.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/flow_hoist_into_globals.mlir
index 332de0a..8cf38af 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/flow_hoist_into_globals.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/flow_hoist_into_globals.mlir
@@ -2,18 +2,18 @@
// CHECK-LABEL: @hoist_sub_byte_tensor_store
module @hoist_sub_byte_tensor_store {
- func.func @main() -> (tensor<64xi4>) {
+ util.func public @main() -> (tensor<64xi4>) {
%0 = arith.constant dense<3> : tensor<64xi32>
%2 = "iree_unregistered.const_expr"(%0) : (tensor<64xi32>) -> tensor<64xi4>
- return %2 : tensor<64xi4>
+ util.return %2 : tensor<64xi4>
}
}
// CHECK: util.global private @{{.*}} : tensor<32xi8>
-// CHECK: func.func @main() -> tensor<64xi4>
+// CHECK: util.func public @main() -> tensor<64xi4>
// CHECK: %[[GLOBAL_LD:.+]] = util.global.load @{{.*}} : tensor<32xi8>
// CHECK: %[[ORIG_VAL:.+]] = flow.tensor.bitcast %[[GLOBAL_LD]] : tensor<32xi8> -> tensor<64xi4>
-// CHECK: return %[[ORIG_VAL]]
+// CHECK: util.return %[[ORIG_VAL]]
// CHECK: util.initializer attributes {iree.compiler.consteval}
// CHECK: %[[CEXPR:.+]] = "iree_unregistered.const_expr"
@@ -30,21 +30,21 @@
// CHECK: util.global private @latent_global : tensor<8xi4>
util.global private @latent_global : tensor<8xi4>
- // CHECK: func.func @main
- func.func @main() -> (tensor<8xi4>, tensor<8xi4>, tensor<8xi4>) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (tensor<8xi4>, tensor<8xi4>, tensor<8xi4>) {
// CHECK-DAG: %[[LOAD_HOISTED_0:.*]] = util.global.load @[[HOISTED_0]] : tensor<4xi8>
// CHECK-DAG: %[[BITCAST_0:.*]] = flow.tensor.bitcast %[[LOAD_HOISTED_0]] : tensor<4xi8> -> tensor<8xi4>
// CHECK-DAG: %[[LOAD_HOISTED_1:.*]] = util.global.load @[[HOISTED_1]] : tensor<4xi8>
// CHECK-DAG: %[[BITCAST_1:.*]] = flow.tensor.bitcast %[[LOAD_HOISTED_1]] : tensor<4xi8> -> tensor<8xi4>
// CHECK-DAG: %[[RESULT:.*]] = "iree_unregistered.var_expr"(%[[BITCAST_1]])
- // CHECK: return %[[BITCAST_0]], %[[BITCAST_1]], %[[RESULT]]
+ // CHECK: util.return %[[BITCAST_0]], %[[BITCAST_1]], %[[RESULT]]
%0 = arith.constant dense<0> : tensor<8xi4>
%1 = arith.constant dense<1> : tensor<8xi4>
%2 = "iree_unregistered.const_expr"(%0, %1) : (tensor<8xi4>, tensor<8xi4>) -> tensor<8xi4>
%3 = util.global.load @latent_global : tensor<8xi4>
%4 = "iree_unregistered.const_expr"(%2, %3) : (tensor<8xi4>, tensor<8xi4>) -> tensor<8xi4>
%5 = "iree_unregistered.var_expr"(%4) : (tensor<8xi4>) -> tensor<8xi4>
- return %2, %4, %5 : tensor<8xi4>, tensor<8xi4>, tensor<8xi4>
+ util.return %2, %4, %5 : tensor<8xi4>, tensor<8xi4>, tensor<8xi4>
}
// CHECK: util.initializer attributes {iree.compiler.consteval} {
// CHECK: %[[C0:.*]] = arith.constant dense<0> : tensor<8xi4>
@@ -70,10 +70,10 @@
// CHECK-LABEL: @hoist_sub_byte_tensor_transitive
// CHECK: util.global
module @hoist_sub_byte_tensor_transitive {
- func.func @main() -> (i32) {
+ util.func public @main() -> (i32) {
%0 = arith.constant dense<3> : tensor<i4>
%2 = "iree_unregistered.const_expr"(%0) : (tensor<i4>) -> i32
- return %2 : i32
+ util.return %2 : i32
}
}
// We do not need to cast for transitive sub-byte values.
@@ -85,9 +85,9 @@
// CHECK-LABEL: @do_not_hoist_metadata_leaf
// CHECK-NOT: util.global
module @do_not_hoist_metadata_leaf {
- func.func @main() -> (tensor<1xi32>) {
+ util.func public @main() -> (tensor<1xi32>) {
%0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi8>
%1 = flow.tensor.bitcast %0 : tensor<4xi8> -> tensor<1xi32>
- return %1 : tensor<1xi32>
+ util.return %1 : tensor<1xi32>
}
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
index 2b59689..d276a46 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_dequantization_matmul.mlir
@@ -1,38 +1,36 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-global-opt-fuse-dequantization-matmul{enable-quantized-matmul-reassociation=true},canonicalize))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-global-opt-fuse-dequantization-matmul{enable-quantized-matmul-reassociation=true},canonicalize))" %s | FileCheck %s
-module {
- func.func @grouped_quantized_matmul_reassociate(%arg0: tensor<11008x32x128xi4>, %arg1: tensor<32x128xf32>, %arg2: tensor<11008x32xf32>, %arg3: tensor<11008x32xf32>) -> tensor<11008xf32> {
- %cst = arith.constant 0.000000e+00 : f32
- %0 = tensor.empty() : tensor<11008xf32>
- %1 = tensor.empty() : tensor<11008x32x128xf32>
- %2 = linalg.fill ins(%cst : f32) outs(%0 : tensor<11008xf32>) -> tensor<11008xf32>
- %3 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
- ins(%arg0, %arg2, %arg3 : tensor<11008x32x128xi4>, tensor<11008x32xf32>, tensor<11008x32xf32>) outs(%1 : tensor<11008x32x128xf32>) {
- ^bb0(%in: i4, %in_0: f32, %in_1: f32, %out: f32):
- %5 = arith.extui %in : i4 to i32
- %6 = arith.uitofp %5 : i32 to f32
- %7 = arith.subf %6, %in_1 : f32
- %8 = arith.mulf %7, %in_0 : f32
- linalg.yield %8 : f32
- } -> tensor<11008x32x128xf32>
- %4 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0)>],
- iterator_types = ["parallel", "reduction", "reduction"]}
- ins(%arg1, %3 : tensor<32x128xf32>, tensor<11008x32x128xf32>) outs(%2 : tensor<11008xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %5 = arith.mulf %in, %in_0 : f32
- %6 = arith.addf %5, %out : f32
- linalg.yield %6 : f32
- } -> tensor<11008xf32>
- return %4 : tensor<11008xf32>
- }
+util.func public @grouped_quantized_matmul_reassociate(%arg0: tensor<11008x32x128xi4>, %arg1: tensor<32x128xf32>, %arg2: tensor<11008x32xf32>, %arg3: tensor<11008x32xf32>) -> tensor<11008xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty() : tensor<11008xf32>
+ %1 = tensor.empty() : tensor<11008x32x128xf32>
+ %2 = linalg.fill ins(%cst : f32) outs(%0 : tensor<11008xf32>) -> tensor<11008xf32>
+ %3 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+ iterator_types = ["parallel", "parallel", "parallel"]}
+ ins(%arg0, %arg2, %arg3 : tensor<11008x32x128xi4>, tensor<11008x32xf32>, tensor<11008x32xf32>) outs(%1 : tensor<11008x32x128xf32>) {
+ ^bb0(%in: i4, %in_0: f32, %in_1: f32, %out: f32):
+ %5 = arith.extui %in : i4 to i32
+ %6 = arith.uitofp %5 : i32 to f32
+ %7 = arith.subf %6, %in_1 : f32
+ %8 = arith.mulf %7, %in_0 : f32
+ linalg.yield %8 : f32
+ } -> tensor<11008x32x128xf32>
+ %4 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0)>],
+ iterator_types = ["parallel", "reduction", "reduction"]}
+ ins(%arg1, %3 : tensor<32x128xf32>, tensor<11008x32x128xf32>) outs(%2 : tensor<11008xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %5 = arith.mulf %in, %in_0 : f32
+ %6 = arith.addf %5, %out : f32
+ linalg.yield %6 : f32
+ } -> tensor<11008xf32>
+ util.return %4 : tensor<11008xf32>
}
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d0)>
@@ -41,7 +39,7 @@
// CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
// CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d1)>
-// CHECK: func.func @grouped_quantized_matmul_reassociate(
+// CHECK: util.func public @grouped_quantized_matmul_reassociate(
// CHECK-SAME: %[[QUANT:[a-zA-Z0-9_]+]]: tensor<11008x32x128xi4>
// CHECK-SAME: %[[UNQUANT:[a-zA-Z0-9_]+]]: tensor<32x128xf32>
// CHECK-SAME: %[[SCALES:[a-zA-Z0-9_]+]]: tensor<11008x32xf32>
@@ -122,43 +120,41 @@
// CHECK: %[[RESUBF:.+]] = arith.subf %[[REMULF1]], %[[REMULF3]] : f32
// CHECK: %[[READDF:.+]] = arith.addf %[[RESUBF]], %[[REOUT0]] : f32
// CHECK: linalg.yield %[[READDF]] : f32
-// CHECK: return %[[GENREASSOCIATE]]
+// CHECK: util.return %[[GENREASSOCIATE]]
// -----
-module {
- func.func @grouped_quantized_matmul_reassociate_f16(%arg0: tensor<11008x32x128xi4>, %arg1: tensor<32x128xf16>, %arg2: tensor<11008x32xf16>, %arg3: tensor<11008x32xf16>) -> tensor<11008xf16> {
- %cst = arith.constant 0.000000e+00 : f16
- %0 = tensor.empty() : tensor<11008xf16>
- %1 = tensor.empty() : tensor<11008x32x128xf16>
- %2 = linalg.fill ins(%cst : f16) outs(%0 : tensor<11008xf16>) -> tensor<11008xf16>
- %3 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
- ins(%arg0, %arg2, %arg3 : tensor<11008x32x128xi4>, tensor<11008x32xf16>, tensor<11008x32xf16>) outs(%1 : tensor<11008x32x128xf16>) {
- ^bb0(%in: i4, %in_0: f16, %in_1: f16, %out: f16):
- %5 = arith.extui %in : i4 to i32
- %6 = arith.uitofp %5 : i32 to f16
- %7 = arith.subf %6, %in_1 : f16
- %8 = arith.mulf %7, %in_0 : f16
- linalg.yield %8 : f16
- } -> tensor<11008x32x128xf16>
- %4 = linalg.generic {
- indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0)>],
- iterator_types = ["parallel", "reduction", "reduction"]}
- ins(%arg1, %3 : tensor<32x128xf16>, tensor<11008x32x128xf16>) outs(%2 : tensor<11008xf16>) {
- ^bb0(%in: f16, %in_0: f16, %out: f16):
- %5 = arith.mulf %in, %in_0 : f16
- %6 = arith.addf %5, %out : f16
- linalg.yield %6 : f16
- } -> tensor<11008xf16>
- return %4 : tensor<11008xf16>
- }
+util.func public @grouped_quantized_matmul_reassociate_f16(%arg0: tensor<11008x32x128xi4>, %arg1: tensor<32x128xf16>, %arg2: tensor<11008x32xf16>, %arg3: tensor<11008x32xf16>) -> tensor<11008xf16> {
+ %cst = arith.constant 0.000000e+00 : f16
+ %0 = tensor.empty() : tensor<11008xf16>
+ %1 = tensor.empty() : tensor<11008x32x128xf16>
+ %2 = linalg.fill ins(%cst : f16) outs(%0 : tensor<11008xf16>) -> tensor<11008xf16>
+ %3 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+ iterator_types = ["parallel", "parallel", "parallel"]}
+ ins(%arg0, %arg2, %arg3 : tensor<11008x32x128xi4>, tensor<11008x32xf16>, tensor<11008x32xf16>) outs(%1 : tensor<11008x32x128xf16>) {
+ ^bb0(%in: i4, %in_0: f16, %in_1: f16, %out: f16):
+ %5 = arith.extui %in : i4 to i32
+ %6 = arith.uitofp %5 : i32 to f16
+ %7 = arith.subf %6, %in_1 : f16
+ %8 = arith.mulf %7, %in_0 : f16
+ linalg.yield %8 : f16
+ } -> tensor<11008x32x128xf16>
+ %4 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+ affine_map<(d0, d1, d2) -> (d0)>],
+ iterator_types = ["parallel", "reduction", "reduction"]}
+ ins(%arg1, %3 : tensor<32x128xf16>, tensor<11008x32x128xf16>) outs(%2 : tensor<11008xf16>) {
+ ^bb0(%in: f16, %in_0: f16, %out: f16):
+ %5 = arith.mulf %in, %in_0 : f16
+ %6 = arith.addf %5, %out : f16
+ linalg.yield %6 : f16
+ } -> tensor<11008xf16>
+ util.return %4 : tensor<11008xf16>
}
// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d0, d1)>
@@ -168,7 +164,7 @@
// CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
// CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d1)>
-// CHECK: func.func @grouped_quantized_matmul_reassociate_f16(
+// CHECK: util.func public @grouped_quantized_matmul_reassociate_f16(
// CHECK-SAME: %[[QUANT:[a-zA-Z0-9_]+]]: tensor<11008x32x128xi4>
// CHECK-SAME: %[[UNQUANT:[a-zA-Z0-9_]+]]: tensor<32x128xf16>
// CHECK-SAME: %[[SCALES:[a-zA-Z0-9_]+]]: tensor<11008x32xf16>
@@ -249,4 +245,4 @@
// CHECK: %[[RESUBF:.+]] = arith.subf %[[REMULF1]], %[[REMULF3]] : f16
// CHECK: %[[READDF:.+]] = arith.addf %[[RESUBF]], %[[REOUT0]] : f16
// CHECK: linalg.yield %[[READDF]] : f16
-// CHECK: return %[[GENREASSOCIATE]]
+// CHECK: util.return %[[GENREASSOCIATE]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_silu_horizontal_matmul.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_silu_horizontal_matmul.mlir
index 9aecd84..7afea1d 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/fuse_silu_horizontal_matmul.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/fuse_silu_horizontal_matmul.mlir
@@ -1,40 +1,38 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-global-opt-fuse-silu-horizontal-matmul,canonicalize))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-global-opt-fuse-silu-horizontal-matmul,canonicalize))" %s | FileCheck %s
#map = affine_map<(d0, d1) -> (d0, d1)>
-module {
- func.func @silu_horizontal_matmul_fusion(%arg0: index, %arg1: tensor<?x5120xf16>, %arg2: tensor<13824x5120xf16>, %arg3: tensor<13824x5120xf16>) -> tensor<?x13824xf16> {
- %cst = arith.constant 1.000000e+00 : f16
- %cst_0 = arith.constant 0.000000e+00 : f16
- %0 = tensor.empty(%arg0) : tensor<?x13824xf16>
- %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
- %2 = linalg.matmul_transpose_b ins(%arg1, %arg2 : tensor<?x5120xf16>, tensor<13824x5120xf16>) outs(%1 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
- %3 = tensor.empty(%arg0) : tensor<?x13824xf16>
- %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<?x13824xf16>) outs(%3 : tensor<?x13824xf16>) {
- ^bb0(%in: f16, %out: f16):
- %10 = arith.negf %in : f16
- %11 = math.exp %10 : f16
- %12 = arith.addf %11, %cst_0 : f16
- %13 = arith.divf %cst_0, %12 : f16
- linalg.yield %13 : f16
- } -> tensor<?x13824xf16>
- %5 = tensor.empty(%arg0) : tensor<?x13824xf16>
- %6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%4, %2 : tensor<?x13824xf16>, tensor<?x13824xf16>) outs(%5 : tensor<?x13824xf16>) {
- ^bb0(%in: f16, %in_1: f16, %out: f16):
- %10 = arith.mulf %in, %in_1 : f16
- linalg.yield %10 : f16
- } -> tensor<?x13824xf16>
- %7 = linalg.matmul_transpose_b ins(%arg1, %arg3 : tensor<?x5120xf16>, tensor<13824x5120xf16>) outs(%1 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
- %8 = tensor.empty(%arg0) : tensor<?x13824xf16>
- %9 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%6, %7 : tensor<?x13824xf16>, tensor<?x13824xf16>) outs(%8 : tensor<?x13824xf16>) {
- ^bb0(%in: f16, %in_1: f16, %out: f16):
- %10 = arith.mulf %in, %in_1 : f16
- linalg.yield %10 : f16
- } -> tensor<?x13824xf16>
- return %9 : tensor<?x13824xf16>
- }
+util.func public @silu_horizontal_matmul_fusion(%arg0: index, %arg1: tensor<?x5120xf16>, %arg2: tensor<13824x5120xf16>, %arg3: tensor<13824x5120xf16>) -> tensor<?x13824xf16> {
+ %cst = arith.constant 1.000000e+00 : f16
+ %cst_0 = arith.constant 0.000000e+00 : f16
+ %0 = tensor.empty(%arg0) : tensor<?x13824xf16>
+ %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
+ %2 = linalg.matmul_transpose_b ins(%arg1, %arg2 : tensor<?x5120xf16>, tensor<13824x5120xf16>) outs(%1 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
+ %3 = tensor.empty(%arg0) : tensor<?x13824xf16>
+ %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<?x13824xf16>) outs(%3 : tensor<?x13824xf16>) {
+ ^bb0(%in: f16, %out: f16):
+ %10 = arith.negf %in : f16
+ %11 = math.exp %10 : f16
+ %12 = arith.addf %11, %cst_0 : f16
+ %13 = arith.divf %cst_0, %12 : f16
+ linalg.yield %13 : f16
+ } -> tensor<?x13824xf16>
+ %5 = tensor.empty(%arg0) : tensor<?x13824xf16>
+ %6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%4, %2 : tensor<?x13824xf16>, tensor<?x13824xf16>) outs(%5 : tensor<?x13824xf16>) {
+ ^bb0(%in: f16, %in_1: f16, %out: f16):
+ %10 = arith.mulf %in, %in_1 : f16
+ linalg.yield %10 : f16
+ } -> tensor<?x13824xf16>
+ %7 = linalg.matmul_transpose_b ins(%arg1, %arg3 : tensor<?x5120xf16>, tensor<13824x5120xf16>) outs(%1 : tensor<?x13824xf16>) -> tensor<?x13824xf16>
+ %8 = tensor.empty(%arg0) : tensor<?x13824xf16>
+ %9 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%6, %7 : tensor<?x13824xf16>, tensor<?x13824xf16>) outs(%8 : tensor<?x13824xf16>) {
+ ^bb0(%in: f16, %in_1: f16, %out: f16):
+ %10 = arith.mulf %in, %in_1 : f16
+ linalg.yield %10 : f16
+ } -> tensor<?x13824xf16>
+ util.return %9 : tensor<?x13824xf16>
}
// CHECK-DAG: #[[MAP:[a-zA-Z0-9]+]] = affine_map<(d0, d1) -> (d0, d1)>
-// CHECK: func.func @silu_horizontal_matmul_fusion(
+// CHECK: util.func public @silu_horizontal_matmul_fusion(
// CHECK-SAME: %[[IN0:.+]]: index,
// CHECK-SAME: %[[IN1:.+]]: tensor<?x5120xf16>,
// CHECK-SAME: %[[IN2:.+]]: tensor<13824x5120xf16>,
@@ -70,4 +68,4 @@
// CHECK: } -> tensor<?x13824xf16>
// CHECK: flow.return %[[OUTPUT]] : tensor<?x13824xf16>
// CHECK: }
-// CHECK: return %[[DISPATCH]] : tensor<?x13824xf16>
+// CHECK: util.return %[[DISPATCH]] : tensor<?x13824xf16>
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir
index 2f97a65..b3413b3 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir
@@ -1,6 +1,6 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-global-opt-generalize-linalg-named-ops))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-global-opt-generalize-linalg-named-ops))" --split-input-file %s | FileCheck %s
-func.func @generalize_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @generalize_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
@@ -8,15 +8,15 @@
%empty = tensor.empty(%d0, %d1): tensor<?x?xf32>
%add = linalg.add ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%empty : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %add : tensor<?x?xf32>
+ util.return %add : tensor<?x?xf32>
}
-// CHECK-LABEL: func @generalize_op
+// CHECK-LABEL: util.func public @generalize_op
// CHECK: %[[GENERIC:.+]] = linalg.generic
-// CHECK: return %[[GENERIC]]
+// CHECK: util.return %[[GENERIC]]
// -----
-func.func @no_generalize_op_within_dispatch(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+util.func public @no_generalize_op_within_dispatch(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
@@ -27,11 +27,11 @@
outs(%empty : tensor<?x?xf32>) -> tensor<?x?xf32>
flow.return %add : tensor<?x?xf32>
}
- return %dispatch : tensor<?x?xf32>
+ util.return %dispatch : tensor<?x?xf32>
}
-// CHECK-LABEL: func @no_generalize_op_within_dispatch
+// CHECK-LABEL: util.func public @no_generalize_op_within_dispatch
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.region
// CHECK: %[[ADD:.+]] = linalg.add
// CHECK: flow.return %[[ADD]]
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals_linalg.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals_linalg.mlir
index c934b30..5724d48 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals_linalg.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals_linalg.mlir
@@ -6,8 +6,8 @@
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module @compute_hoisted {
// CHECK: util.global private @[[HOISTED:.*]] : tensor<5x6xf32>
- // CHECK: func.func @main
- func.func @main() -> (tensor<5x6xf32>) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (tensor<5x6xf32>) {
%cst_0 = arith.constant dense<1.270000e+02> : tensor<f32>
// A non-leaf broadcast.
@@ -26,8 +26,8 @@
} -> tensor<5x6xf32>
// CHECK: %[[RESULT:.*]] = util.global.load @[[HOISTED]] : tensor<5x6xf32>
- // CHECK: return %[[RESULT]]
- return %3 : tensor<5x6xf32>
+ // CHECK: util.return %[[RESULT]]
+ util.return %3 : tensor<5x6xf32>
}
// CHECK: util.initializer
}
@@ -41,8 +41,8 @@
#map1 = affine_map<(d0, d1) -> (d0, d1)>
module @broadcast_treated_as_leaf {
// CHECK-NOT: util.global
- // CHECK: func.func @main
- func.func @main() -> (tensor<5x6xf32>) {
+ // CHECK: util.func public @main
+ util.func public @main() -> (tensor<5x6xf32>) {
%cst_0 = arith.constant dense<1.270000e+02> : tensor<f32>
// CHECK: tensor.empty()
%0 = tensor.empty() : tensor<5x6xf32>
@@ -52,8 +52,8 @@
^bb0(%arg1: f32, %arg2: f32): // no predecessors
linalg.yield %arg1 : f32
} -> tensor<5x6xf32>
- // CHECK: return
- return %1 : tensor<5x6xf32>
+ // CHECK: util.return
+ util.return %1 : tensor<5x6xf32>
}
// CHECK-NOT: util.initializer
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/infer_numeric_narrowing.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/infer_numeric_narrowing.mlir
index 025a981..3728f74 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/infer_numeric_narrowing.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/infer_numeric_narrowing.mlir
@@ -7,7 +7,7 @@
// Checks as a by-product:
// - Infering ui0 for [0, 0] range
// - Infering unsigned for >= 0 range
-func.func @probe_linalg_op(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
+util.func public @probe_linalg_op(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
// CHECK-DAG: %[[RHS:.*]] = arith.constant dense
// CHECK-DAG: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
// CHECK-DAG: util.numeric.optional_narrow %[[ZERO]] : f32 as ui0
@@ -20,56 +20,56 @@
%0 = tensor.empty() : tensor<5x1xf32>
%1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @infer_symmetric_signed
// CHECK: util.numeric.optional_narrow %{{.*}} : tensor<3x1xf32> as si8 {max_value = 127 : si8, min_value = -39 : si8}
-func.func @infer_symmetric_signed(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
+util.func public @infer_symmetric_signed(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
%rhs = arith.constant dense<
[[-3.900000e+01], [0.000000e+00], [1.270000e+02]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<5x1xf32>
%1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @infer_i1_signed
// Signed i1 is a silly boundary condition worth checking.
// CHECK: util.numeric.optional_narrow %{{.*}} : tensor<3x1xf32> as si1 {max_value = 0 : si1, min_value = -1 : si1}
-func.func @infer_i1_signed(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
+util.func public @infer_i1_signed(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
%rhs = arith.constant dense<
[[0.000000e+00], [0.000000e+00], [-1.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<5x1xf32>
%1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @infer_positive_non_straddling_zero
// A range that does not straddle zero is a special case in the code.
// CHECK: util.numeric.optional_narrow %{{.*}} : tensor<3x1xf32> as ui2 {max_value = 2 : ui2, min_value = 1 : ui2}
-func.func @infer_positive_non_straddling_zero(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
+util.func public @infer_positive_non_straddling_zero(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
%rhs = arith.constant dense<
[[1.000000e+00], [1.000000e+00], [2.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<5x1xf32>
%1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @infer_negative_non_straddling_zero
// A range that does not straddle zero is a special case in the code.
// CHECK: util.numeric.optional_narrow %{{.*}} : tensor<3x1xf32> as si2 {max_value = -1 : si2, min_value = -2 : si2}
-func.func @infer_negative_non_straddling_zero(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
+util.func public @infer_negative_non_straddling_zero(%arg0 : tensor<5x3xf32>) -> tensor<5x1xf32> {
%rhs = arith.constant dense<
[[-1.000000e+00], [-1.000000e+00], [-2.000000e+00]]> : tensor<3x1xf32>
%init_value = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<5x1xf32>
%1 = linalg.fill ins(%init_value : f32) outs(%0 : tensor<5x1xf32>) -> tensor<5x1xf32>
%2 = linalg.matmul ins(%arg0, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%1 : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir
index 009a211..61b2572 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir
@@ -7,7 +7,7 @@
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#executable_target_embedded_elf_x86_64_]}>
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
- func.func @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -22,10 +22,10 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%3 = iree_linalg_ext.set_encoding %padded : tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>>
%4 = iree_linalg_ext.unset_encoding %3 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>> -> tensor<?x?xf32>
- return %4 : tensor<?x?xf32>
+ util.return %4 : tensor<?x?xf32>
}
}
-// CHECK-LABEL: func.func @lhs_encoding
+// CHECK-LABEL: util.func public @lhs_encoding
// CHECK: tensor.pack
// CHECK: tensor.unpack
@@ -38,7 +38,7 @@
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
#device_target_vulkan = #hal.device.target<"vulkan", {executable_targets = [#executable_target_vulkan_spirv_fb], legacy_sync}>
module attributes {hal.device.targets = [#device_target_vulkan]} {
- func.func @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -53,14 +53,14 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%3 = iree_linalg_ext.set_encoding %padded : tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>>
%4 = iree_linalg_ext.unset_encoding %3 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>> -> tensor<?x?xf32>
- return %4 : tensor<?x?xf32>
+ util.return %4 : tensor<?x?xf32>
}
}
// vulkan uses default materialization patterns which unsets the encodings.
-// CHECK-LABEL: func.func @lhs_encoding
+// CHECK-LABEL: util.func public @lhs_encoding
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK: return %[[ARG0]]
+// CHECK: util.return %[[ARG0]]
// -----
@@ -73,7 +73,7 @@
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb">
#device_target_vulkan = #hal.device.target<"vulkan", {executable_targets = [#executable_target_vulkan_spirv_fb], legacy_sync}>
module attributes {hal.device.targets = [#device_target_vulkan, #device_target_llvm_cpu]} {
- func.func @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -88,11 +88,11 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%3 = iree_linalg_ext.set_encoding %padded : tensor<?x?xf32> -> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>>
%4 = iree_linalg_ext.unset_encoding %3 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3]>> -> tensor<?x?xf32>
- return %4 : tensor<?x?xf32>
+ util.return %4 : tensor<?x?xf32>
}
}
// Multiple targets are currently unsupported.
-// CHECK-LABEL: func.func @lhs_encoding
+// CHECK-LABEL: util.func public @lhs_encoding
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK: return %[[ARG0]]
+// CHECK: util.return %[[ARG0]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/optimize_numerics.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/optimize_numerics.mlir
index 9d002cb..f31bb33 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/optimize_numerics.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/optimize_numerics.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --iree-global-opt-optimize-numerics %s | FileCheck %s
// CHECK-LABEL: @matmul_i8_i8_i32_unsigned
-func.func @matmul_i8_i8_i32_unsigned(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
+util.func public @matmul_i8_i8_i32_unsigned(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
// CHECK: %[[LHS:.*]] = arith.fptoui %arg0 : tensor<5x3xf32> to tensor<5x3xi8>
// CHECK: %[[RHS:.*]] = arith.fptoui %arg1 : tensor<3x1xf32> to tensor<3x1xi8>
// CHECK: %[[INIT:.*]] = arith.fptoui %arg2 : tensor<5x1xf32> to tensor<5x1xi32>
@@ -11,11 +11,11 @@
// CHECK: %[[RESULT:.*]] = linalg.matmul_unsigned ins(%[[LHS]], %[[RHS]] : tensor<5x3xi8>, tensor<3x1xi8>) outs(%[[INIT]] : tensor<5x1xi32>)
%2 = linalg.matmul ins(%lhs, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%init : tensor<5x1xf32>) -> tensor<5x1xf32>
// CHECK: arith.uitofp %[[RESULT]] : tensor<5x1xi32> to tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @matmul_i8_i8_i32_signed
-func.func @matmul_i8_i8_i32_signed(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
+util.func public @matmul_i8_i8_i32_signed(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
// CHECK: %[[LHS:.*]] = arith.fptosi %arg0 : tensor<5x3xf32> to tensor<5x3xi8>
// CHECK: %[[RHS:.*]] = arith.fptosi %arg1 : tensor<3x1xf32> to tensor<3x1xi8>
// CHECK: %[[INIT:.*]] = arith.fptosi %arg2 : tensor<5x1xf32> to tensor<5x1xi32>
@@ -25,12 +25,12 @@
// CHECK: %[[RESULT:.*]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<5x3xi8>, tensor<3x1xi8>) outs(%[[INIT]] : tensor<5x1xi32>)
%2 = linalg.matmul ins(%lhs, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%init : tensor<5x1xf32>) -> tensor<5x1xf32>
// CHECK: arith.sitofp %[[RESULT]] : tensor<5x1xi32> to tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @matmul_i4_i4_i32_signed
// For now we clamp this to i8
-func.func @matmul_i4_i4_i32_signed(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
+util.func public @matmul_i4_i4_i32_signed(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
// CHECK: %[[LHS:.*]] = arith.fptosi %arg0 : tensor<5x3xf32> to tensor<5x3xi8>
// CHECK: %[[RHS:.*]] = arith.fptosi %arg1 : tensor<3x1xf32> to tensor<3x1xi8>
// CHECK: %[[INIT:.*]] = arith.fptosi %arg2 : tensor<5x1xf32> to tensor<5x1xi32>
@@ -40,38 +40,38 @@
// CHECK: %[[RESULT:.*]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<5x3xi8>, tensor<3x1xi8>) outs(%[[INIT]] : tensor<5x1xi32>)
%2 = linalg.matmul ins(%lhs, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%init : tensor<5x1xf32>) -> tensor<5x1xf32>
// CHECK: arith.sitofp %[[RESULT]] : tensor<5x1xi32> to tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @matmul_reject_gt_8bit
// We may relax this restriction at some point but for right now we have it
// because less analysis is needed to prove safety.
// CHECK-NOT: fptosi
-func.func @matmul_reject_gt_8bit(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
+util.func public @matmul_reject_gt_8bit(%arg0 : tensor<5x3xf32>, %arg1 : tensor<3x1xf32>, %arg2 : tensor<5x1xf32>) -> tensor<5x1xf32> {
%lhs = util.numeric.optional_narrow %arg0 : tensor<5x3xf32> as ui9 {max_value = 312 : ui9, min_value = 0 : ui9}
%rhs = util.numeric.optional_narrow %arg1 : tensor<3x1xf32> as si8 {max_value = 127 : si8, min_value = -127 : si8}
%init = util.numeric.optional_narrow %arg2 : tensor<5x1xf32> as ui0
// CHECK: linalg.matmul {{.*}} -> tensor<5x1xf32>
%2 = linalg.matmul ins(%lhs, %rhs : tensor<5x3xf32>, tensor<3x1xf32>) outs(%init : tensor<5x1xf32>) -> tensor<5x1xf32>
- return %2 : tensor<5x1xf32>
+ util.return %2 : tensor<5x1xf32>
}
// CHECK-LABEL: @cast_fill
-func.func @cast_fill(%arg0 : f32, %arg1 : tensor<3xf32>) -> tensor<3xi8> {
+util.func public @cast_fill(%arg0 : f32, %arg1 : tensor<3xf32>) -> tensor<3xi8> {
// CHECK: %[[SCALAR:.*]] = arith.fptosi %arg0 : f32 to i8
// CHECK: %[[INIT:.*]] = arith.fptosi %arg1 : tensor<3xf32> to tensor<3xi8>
// CHECK: %[[RESULT:.*]] = linalg.fill ins(%[[SCALAR]] : i8) outs(%[[INIT]] : tensor<3xi8>) -> tensor<3xi8>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = linalg.fill ins(%arg0 : f32) outs(%arg1 : tensor<3xf32>) -> tensor<3xf32>
%1 = arith.fptosi %0 : tensor<3xf32> to tensor<3xi8>
- return %1 : tensor<3xi8>
+ util.return %1 : tensor<3xi8>
}
// CHECK-LABEL: @cast_init
-func.func @cast_init() -> tensor<5x9xi8> {
+util.func public @cast_init() -> tensor<5x9xi8> {
// CHECK: %[[RESULT:.*]] = tensor.empty() : tensor<5x9xi8>
- // CHECK: return %[[RESULT]]
+ // CHECK: util.return %[[RESULT]]
%0 = tensor.empty() : tensor<5x9xf32>
%1 = arith.fptosi %0 : tensor<5x9xf32> to tensor<5x9xi8>
- return %1 : tensor<5x9xi8>
+ util.return %1 : tensor<5x9xi8>
}
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/propagate_linalg_transpose.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/propagate_linalg_transpose.mlir
index b1f0d8f..05f5d0b 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/propagate_linalg_transpose.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/propagate_linalg_transpose.mlir
@@ -1,97 +1,97 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-global-opt-propagate-linalg-transpose))" --split-input-file %s | FileCheck %s
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-global-opt-propagate-linalg-transpose{enable-aggressive-propagation=true}))" --split-input-file %s | FileCheck %s --check-prefix=APROP
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-global-opt-propagate-linalg-transpose{test-sinking-only=true}))" --split-input-file %s | FileCheck %s --check-prefix=SINK
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-global-opt-propagate-linalg-transpose))" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-global-opt-propagate-linalg-transpose{enable-aggressive-propagation=true}))" --split-input-file %s | FileCheck %s --check-prefix=APROP
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-global-opt-propagate-linalg-transpose{test-sinking-only=true}))" --split-input-file %s | FileCheck %s --check-prefix=SINK
-func.func @specialize_transpose_op(%arg0 : tensor<1x2x3xf32>,
+util.func public @specialize_transpose_op(%arg0 : tensor<1x2x3xf32>,
%empty : tensor<3x2x1xf32>) -> tensor<3x2x1xf32> {
%transposed = linalg.generic {indexing_maps = [
affine_map<(d0, d1, d2) -> (d0, d2, d1)>,
affine_map<(d0, d1, d2) -> (d1, d2, d0)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
+ iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<1x2x3xf32>)
outs(%empty : tensor<3x2x1xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<3x2x1xf32>
- return %transposed : tensor<3x2x1xf32>
+ util.return %transposed : tensor<3x2x1xf32>
}
-// CHECK-LABEL: func @specialize_transpose_op
+// CHECK-LABEL: util.func public @specialize_transpose_op
// CHECK: %[[TRANSPOSE:.+]] = linalg.transpose
// CHECK-SAME: permutation = [2, 1, 0]
-// CHECK: return %[[TRANSPOSE]]
+// CHECK: util.return %[[TRANSPOSE]]
// -----
-func.func @specialize_non_involution_transpose_op(%arg0 : tensor<1x2x3xf32>,
+util.func public @specialize_non_involution_transpose_op(%arg0 : tensor<1x2x3xf32>,
%empty : tensor<2x3x1xf32>) -> tensor<2x3x1xf32> {
%transposed = linalg.generic {indexing_maps = [
affine_map<(d0, d1, d2) -> (d2, d0, d1)>,
affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
- iterator_types = ["parallel", "parallel", "parallel"]}
+ iterator_types = ["parallel", "parallel", "parallel"]}
ins(%arg0 : tensor<1x2x3xf32>)
outs(%empty : tensor<2x3x1xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<2x3x1xf32>
- return %transposed : tensor<2x3x1xf32>
+ util.return %transposed : tensor<2x3x1xf32>
}
-// CHECK-LABEL: func @specialize_non_involution_transpose_op
+// CHECK-LABEL: util.func public @specialize_non_involution_transpose_op
// CHECK: %[[TRANSPOSE:.+]] = linalg.transpose
// CHECK-SAME: permutation = [1, 2, 0]
-// CHECK: return %[[TRANSPOSE]]
+// CHECK: util.return %[[TRANSPOSE]]
// -----
-func.func @propagate_through_extract_slice(%arg0 : tensor<1x256x128xf32>) -> tensor<1x128x32xf32> {
+util.func public @propagate_through_extract_slice(%arg0 : tensor<1x256x128xf32>) -> tensor<1x128x32xf32> {
%empty = tensor.empty(): tensor<1x128x256xf32>
%transposed = linalg.transpose ins(%arg0 : tensor<1x256x128xf32>)
outs(%empty : tensor<1x128x256xf32>) permutation = [0, 2, 1]
%slice = tensor.extract_slice %transposed[0, 0, 0] [1, 128, 32] [1, 1, 1] : tensor<1x128x256xf32> to tensor<1x128x32xf32>
- return %slice : tensor<1x128x32xf32>
+ util.return %slice : tensor<1x128x32xf32>
}
-// CHECK-LABEL: func @propagate_through_extract_slice
+// CHECK-LABEL: util.func public @propagate_through_extract_slice
// CHECK: %[[SLICE:.+]] = tensor.extract_slice {{.*}}[0, 0, 0] [1, 32, 128] [1, 1, 1]
// CHECK-SAME: tensor<1x256x128xf32> to tensor<1x32x128xf32>
// CHECK: %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[SLICE]] : tensor<1x32x128xf32>)
// CHECK-SAME: permutation = [0, 2, 1]
-// CHECK: return %[[TRANSPOSE]]
+// CHECK: util.return %[[TRANSPOSE]]
// -----
-func.func @propagate_through_rank_reduced_extract_slice(%arg0 : tensor<1x256x1x128x1xf32>) -> tensor<128x32xf32> {
+util.func public @propagate_through_rank_reduced_extract_slice(%arg0 : tensor<1x256x1x128x1xf32>) -> tensor<128x32xf32> {
%empty = tensor.empty(): tensor<1x128x1x256x1xf32>
%transposed = linalg.transpose ins(%arg0 : tensor<1x256x1x128x1xf32>)
outs(%empty : tensor<1x128x1x256x1xf32>) permutation = [0, 3, 2, 1, 4]
%slice = tensor.extract_slice %transposed[0, 0, 0, 0, 0] [1, 128, 1, 32, 1] [1, 1, 1, 1, 1]
: tensor<1x128x1x256x1xf32> to tensor<128x32xf32>
- return %slice : tensor<128x32xf32>
+ util.return %slice : tensor<128x32xf32>
}
-// CHECK-LABEL: func @propagate_through_rank_reduced_extract_slice
+// CHECK-LABEL: util.func public @propagate_through_rank_reduced_extract_slice
// CHECK: %[[SLICE:.+]] = tensor.extract_slice
// CHECK-SAME: [0, 0, 0, 0, 0] [1, 32, 1, 128, 1] [1, 1, 1, 1, 1]
// CHECK-SAME: tensor<1x256x1x128x1xf32> to tensor<32x128xf32>
// CHECK: %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[SLICE]] : tensor<32x128xf32>)
// CHECK-SAME: permutation = [1, 0]
-// CHECK: return %[[TRANSPOSE]]
+// CHECK: util.return %[[TRANSPOSE]]
// -----
-func.func @rank_reduced_extract_transposed_unit_dim(%arg0: tensor<256x1x32x128xf32>, %arg1: tensor<1x32x256x128xf32>) -> tensor<32x64x128xf32> {
- %transposed = linalg.transpose ins(%arg0 : tensor<256x1x32x128xf32>) outs(%arg1 : tensor<1x32x256x128xf32>) permutation = [1, 2, 0, 3]
+util.func public @rank_reduced_extract_transposed_unit_dim(%arg0: tensor<256x1x32x128xf32>, %arg1: tensor<1x32x256x128xf32>) -> tensor<32x64x128xf32> {
+ %transposed = linalg.transpose ins(%arg0 : tensor<256x1x32x128xf32>) outs(%arg1 : tensor<1x32x256x128xf32>) permutation = [1, 2, 0, 3]
%extracted_slice = tensor.extract_slice %transposed[0, 0, 0, 0] [1, 32, 64, 128] [1, 1, 1, 1] : tensor<1x32x256x128xf32> to tensor<32x64x128xf32>
- return %extracted_slice : tensor<32x64x128xf32>
+ util.return %extracted_slice : tensor<32x64x128xf32>
}
-// SINK-LABEL: func @rank_reduced_extract_transposed_unit_dim
+// SINK-LABEL: util.func public @rank_reduced_extract_transposed_unit_dim
// SINK: %[[EXT:.+]] = tensor.extract_slice
// SINK-SAME: tensor<256x1x32x128xf32> to tensor<64x32x128xf32>
// SINK: %[[RES:.+]] = linalg.transpose ins(%[[EXT]] : tensor<64x32x128xf32>
// SINK-SAME: outs({{.*}} : tensor<32x64x128xf32>)
// SINK-SAME: permutation = [1, 0, 2]
-// SINK: return %[[RES]] : tensor<32x64x128xf32>
+// SINK: util.return %[[RES]] : tensor<32x64x128xf32>
// -----
-func.func @propagate_to_matmul_ops(%lhs: tensor<16x16xf32>,
+util.func public @propagate_to_matmul_ops(%lhs: tensor<16x16xf32>,
%transposed_a: tensor<16x16xf32>,
%transposed_b: tensor<16x16xf32>) -> tensor<16x16xf32> {
%empty = tensor.empty(): tensor<16x16xf32>
@@ -104,16 +104,16 @@
outs(%empty : tensor<16x16xf32>) permutation = [1, 0]
%second_mm = linalg.matmul ins(%second_lhs, %first_mm : tensor<16x16xf32>, tensor<16x16xf32>)
outs(%empty : tensor<16x16xf32>) -> tensor<16x16xf32>
- return %second_mm : tensor<16x16xf32>
+ util.return %second_mm : tensor<16x16xf32>
}
-// CHECK-LABEL: func @propagate_to_matmul_ops
+// CHECK-LABEL: util.func public @propagate_to_matmul_ops
// CHECK: linalg.matmul_transpose_b
// CHECK: %[[SECOND_MM:.+]] = linalg.matmul_transpose_a
-// CHECK: return %[[SECOND_MM]]
+// CHECK: util.return %[[SECOND_MM]]
// -----
-func.func @propagate_to_transposed_matmul_ops(%lhs: tensor<16x16xf32>,
+util.func public @propagate_to_transposed_matmul_ops(%lhs: tensor<16x16xf32>,
%second_lhs: tensor<16x16xf32>,
%rhs: tensor<16x16xf32>) -> tensor<16x16xf32> {
%empty = tensor.empty(): tensor<16x16xf32>
@@ -126,16 +126,16 @@
outs(%empty : tensor<16x16xf32>) permutation = [1, 0]
%second_mm = linalg.matmul_transpose_a ins(%transpose_a, %first_mm : tensor<16x16xf32>, tensor<16x16xf32>)
outs(%empty : tensor<16x16xf32>) -> tensor<16x16xf32>
- return %second_mm : tensor<16x16xf32>
+ util.return %second_mm : tensor<16x16xf32>
}
-// CHECK-LABEL: func @propagate_to_transposed_matmul_ops
+// CHECK-LABEL: util.func public @propagate_to_transposed_matmul_ops
// CHECK: linalg.matmul ins
// CHECK: %[[SECOND_MM:.+]] = linalg.matmul ins
-// CHECK: return %[[SECOND_MM]]
+// CHECK: util.return %[[SECOND_MM]]
// -----
-func.func @propagate_to_bmm_ops(%lhs: tensor<2x16x16xf32>,
+util.func public @propagate_to_bmm_ops(%lhs: tensor<2x16x16xf32>,
%transposed_a: tensor<2x16x16xf32>,
%transposed_b: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%empty = tensor.empty(): tensor<2x16x16xf32>
@@ -148,16 +148,16 @@
outs(%empty : tensor<2x16x16xf32>) permutation = [0, 2, 1]
%second_bmm = linalg.batch_matmul ins(%second_lhs, %first_bmm : tensor<2x16x16xf32>, tensor<2x16x16xf32>)
outs(%empty : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
- return %second_bmm : tensor<2x16x16xf32>
+ util.return %second_bmm : tensor<2x16x16xf32>
}
-// CHECK-LABEL: func @propagate_to_bmm_ops
+// CHECK-LABEL: util.func public @propagate_to_bmm_ops
// CHECK: linalg.batch_matmul_transpose_b
// CHECK: %[[SECOND_MM:.+]] = linalg.batch_matmul_transpose_a
-// CHECK: return %[[SECOND_MM]]
+// CHECK: util.return %[[SECOND_MM]]
// -----
-func.func @propagate_to_transposed_bmm_ops(%lhs: tensor<2x16x16xf32>,
+util.func public @propagate_to_transposed_bmm_ops(%lhs: tensor<2x16x16xf32>,
%second_lhs: tensor<2x16x16xf32>,
%rhs: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%empty = tensor.empty(): tensor<2x16x16xf32>
@@ -170,16 +170,16 @@
outs(%empty : tensor<2x16x16xf32>) permutation = [0, 2, 1]
%second_bmm = linalg.batch_matmul_transpose_a ins(%transpose_a, %first_bmm : tensor<2x16x16xf32>, tensor<2x16x16xf32>)
outs(%empty : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
- return %second_bmm : tensor<2x16x16xf32>
+ util.return %second_bmm : tensor<2x16x16xf32>
}
-// CHECK-LABEL: func @propagate_to_transposed_bmm_ops
+// CHECK-LABEL: util.func public @propagate_to_transposed_bmm_ops
// CHECK: linalg.batch_matmul ins
// CHECK: %[[SECOND_MM:.+]] = linalg.batch_matmul ins
-// CHECK: return %[[SECOND_MM]]
+// CHECK: util.return %[[SECOND_MM]]
// -----
-func.func @do_not_propagate_to_matmul_in_dispatch(%lhs: tensor<16x16xf32>,
+util.func public @do_not_propagate_to_matmul_in_dispatch(%lhs: tensor<16x16xf32>,
%transposed_b: tensor<16x16xf32>) -> tensor<16x16xf32> {
%empty = tensor.empty(): tensor<16x16xf32>
%rhs = linalg.transpose ins(%transposed_b : tensor<16x16xf32>)
@@ -189,45 +189,45 @@
outs(%empty : tensor<16x16xf32>) -> tensor<16x16xf32>
flow.return %mm : tensor<16x16xf32>
}
- return %dispatch : tensor<16x16xf32>
+ util.return %dispatch : tensor<16x16xf32>
}
-// CHECK-LABEL: func @do_not_propagate_to_matmul_in_dispatch
+// CHECK-LABEL: util.func public @do_not_propagate_to_matmul_in_dispatch
// CHECK: linalg.transpose
// CHECK: %[[DISPATCH:.+]] = flow.dispatch.region
// CHECK: linalg.matmul ins
-// CHECK: return %[[DISPATCH]]
+// CHECK: util.return %[[DISPATCH]]
// -----
-func.func @propagate_to_bmm_transpose_batch(%transposed_lhs: tensor<16x2x16xf32>,
+util.func public @propagate_to_bmm_transpose_batch(%transposed_lhs: tensor<16x2x16xf32>,
%rhs: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%empty = tensor.empty(): tensor<2x16x16xf32>
%lhs = linalg.transpose ins(%transposed_lhs : tensor<16x2x16xf32>)
outs(%empty : tensor<2x16x16xf32>) permutation = [1, 0, 2]
%bmm = linalg.batch_matmul ins(%lhs, %rhs : tensor<2x16x16xf32>, tensor<2x16x16xf32>)
outs(%empty : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
- return %bmm : tensor<2x16x16xf32>
+ util.return %bmm : tensor<2x16x16xf32>
}
// Verify that without aggressive propagation, this stays as a batch matmul
-// CHECK-LABEL: func @propagate_to_bmm_transpose_batch
+// CHECK-LABEL: util.func public @propagate_to_bmm_transpose_batch
// CHECK: linalg.batch_matmul
// APROP: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
// APROP: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// APROP: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// APROP-LABEL: func @propagate_to_bmm_transpose_batch
+// APROP-LABEL: util.func public @propagate_to_bmm_transpose_batch
// APROP-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<16x2x16xf32>
// APROP-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<2x16x16xf32>
// APROP: %[[GENERIC:.+]] = linalg.generic
// APROP-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// APROP-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]
// APROP-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<16x2x16xf32>, tensor<2x16x16xf32>
-// APROP: return %[[GENERIC]] : tensor<2x16x16xf32>
+// APROP: util.return %[[GENERIC]] : tensor<2x16x16xf32>
// -----
-func.func @sink_through_expand_shape(%arg0 : tensor<?x?x?xf32>) -> tensor<32x?x16x?x?xf32> {
+util.func public @sink_through_expand_shape(%arg0 : tensor<?x?x?xf32>) -> tensor<32x?x16x?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -238,29 +238,29 @@
%transposed = linalg.transpose ins(%arg0 : tensor<?x?x?xf32>)
outs(%empty : tensor<?x?x?xf32>) permutation = [1, 0, 2]
%expanded = tensor.expand_shape %transposed [[0, 1], [2, 3], [4]] : tensor<?x?x?xf32> into tensor<32x?x16x?x?xf32>
- return %expanded : tensor<32x?x16x?x?xf32>
+ util.return %expanded : tensor<32x?x16x?x?xf32>
}
-// SINK-LABEL: func @sink_through_expand_shape
+// SINK-LABEL: util.func public @sink_through_expand_shape
// SINK: %[[EXP:.+]] = tensor.expand_shape {{.*}} {{\[\[}}0, 1], [2, 3], [4]]
// SINK-SAME: tensor<?x?x?xf32> into tensor<16x?x32x?x?xf32>
// SINK: %[[RES:.+]] = linalg.transpose ins(%[[EXP]] : tensor<16x?x32x?x?xf32>
// SINK-SAME: outs({{.*}} : tensor<32x?x16x?x?xf32>)
// SINK-SAME: permutation = [2, 3, 0, 1, 4]
-// SINK: return %[[RES]] : tensor<32x?x16x?x?xf32>
+// SINK: util.return %[[RES]] : tensor<32x?x16x?x?xf32>
// -----
-func.func @sink_non_involution_through_expand_shape(%arg0 : tensor<2x3x4xf32>) -> tensor<1x3x4x2xf32> {
+util.func public @sink_non_involution_through_expand_shape(%arg0 : tensor<2x3x4xf32>) -> tensor<1x3x4x2xf32> {
%empty = tensor.empty(): tensor<3x4x2xf32>
%transposed = linalg.transpose ins(%arg0 : tensor<2x3x4xf32>)
outs(%empty : tensor<3x4x2xf32>) permutation = [1, 2, 0]
%expanded = tensor.expand_shape %transposed [[0, 1], [2], [3]] : tensor<3x4x2xf32> into tensor<1x3x4x2xf32>
- return %expanded : tensor<1x3x4x2xf32>
+ util.return %expanded : tensor<1x3x4x2xf32>
}
-// SINK-LABEL: func @sink_non_involution_through_expand_shape
+// SINK-LABEL: util.func public @sink_non_involution_through_expand_shape
// SINK: %[[EXP:.+]] = tensor.expand_shape {{.*}} {{\[\[}}0], [1, 2], [3]]
// SINK-SAME: tensor<2x3x4xf32> into tensor<2x1x3x4xf32>
// SINK: %[[RES:.+]] = linalg.transpose ins(%[[EXP]] : tensor<2x1x3x4xf32>
// SINK-SAME: outs({{.*}} : tensor<1x3x4x2xf32>)
// SINK-SAME: permutation = [1, 2, 3, 0]
-// SINK: return %[[RES]] : tensor<1x3x4x2xf32>
+// SINK: util.return %[[RES]] : tensor<1x3x4x2xf32>
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/raise_special_ops.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/raise_special_ops.mlir
index 48f9ddc..132a08b 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/raise_special_ops.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/raise_special_ops.mlir
@@ -4,9 +4,9 @@
// CHECK-SAME: %[[ARG:.+]]: tensor<?x?x?xf32>
// CHECK: %[[E:.+]] = tensor.empty(%{{.*}}, %{{.*}}, %{{.*}}) : tensor<?x?x?xf32>
// CHECK: %[[S:.+]] = linalg.softmax dimension(2) ins(%[[ARG]] : tensor<?x?x?xf32>) outs(%[[E]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-// CHECK: return %[[S]] : tensor<?x?x?xf32>
+// CHECK: util.return %[[S]] : tensor<?x?x?xf32>
-func.func @softmax(%src : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
+util.func public @softmax(%src : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant -3.40282347E+38 : f32
@@ -50,15 +50,17 @@
%11 = arith.mulf %arg0, %arg1 : f32
linalg.yield %11 : f32
} -> tensor<?x?x?xf32>
- return %10 : tensor<?x?x?xf32>
+ util.return %10 : tensor<?x?x?xf32>
}
+// -----
+
// CHECK-LABEL: @softmax_no_rcp
// CHECK-SAME: %[[ARG:.+]]: tensor<10x4096x4096xf16>
// CHECK: %[[E:.+]] = tensor.empty() : tensor<10x4096x4096xf16>
// CHECK: %[[S:.+]] = linalg.softmax dimension(2) ins(%[[ARG]] : tensor<10x4096x4096xf16>) outs(%[[E]] : tensor<10x4096x4096xf16>) -> tensor<10x4096x4096xf16>
-// CHECK: return %[[S]] : tensor<10x4096x4096xf16>
-func.func @softmax_no_rcp(%src : tensor<10x4096x4096xf16>) -> (tensor<10x4096x4096xf16>) {
+// CHECK: util.return %[[S]] : tensor<10x4096x4096xf16>
+util.func public @softmax_no_rcp(%src : tensor<10x4096x4096xf16>) -> (tensor<10x4096x4096xf16>) {
%cst_158 = arith.constant -6.550400e+04 : f16
%cst_121 = arith.constant 0.000000e+00 : f16
%224 = tensor.empty() : tensor<10x4096xf16>
@@ -106,16 +108,17 @@
%5290 = arith.divf %in, %in_1572 : f16
linalg.yield %5290 : f16
} -> tensor<10x4096x4096xf16>
- return %232 : tensor<10x4096x4096xf16>
+ util.return %232 : tensor<10x4096x4096xf16>
}
+// -----
// CHECK-LABEL: @softmax_broadcast
// CHECK-SAME: %[[ARG:.+]]: tensor<12x128x128xf32>
// CHECK: %[[E:.+]] = tensor.empty() : tensor<12x128x128xf32>
// CHECK: %[[S:.+]] = linalg.softmax dimension(2) ins(%[[ARG]] : tensor<12x128x128xf32>) outs(%[[E]] : tensor<12x128x128xf32>) -> tensor<12x128x128xf32>
-// CHECK: return %[[S]] : tensor<12x128x128xf32>
-func.func @softmax_broadcast(%93 : tensor<12x128x128xf32>) -> (tensor<12x128x128xf32>) {
+// CHECK: util.return %[[S]] : tensor<12x128x128xf32>
+util.func public @softmax_broadcast(%93 : tensor<12x128x128xf32>) -> (tensor<12x128x128xf32>) {
%cst_16 = arith.constant 0xFF800000 : f32
%cst_18 = arith.constant -0.000000e+00 : f32
%94 = tensor.empty() : tensor<12x128xf32>
@@ -160,10 +163,12 @@
%2460 = arith.divf %in, %in_261 : f32
linalg.yield %2460 : f32
} -> tensor<12x128x128xf32>
- return %109 : tensor<12x128x128xf32>
+ util.return %109 : tensor<12x128x128xf32>
}
-func.func @aTransposeBMatmul(%arg0 : tensor<10x20xf32>,
+// -----
+
+util.func public @aTransposeBMatmul(%arg0 : tensor<10x20xf32>,
%arg1 : tensor<40x20xf32>) -> tensor<10x40xf32> {
%0 = tensor.empty() : tensor<20x40xf32>
%1 = linalg.generic {
@@ -178,16 +183,18 @@
%4 = linalg.fill ins(%3 : f32) outs(%2 : tensor<10x40xf32>) -> tensor<10x40xf32>
%5 = linalg.matmul ins(%arg0, %1 : tensor<10x20xf32>, tensor<20x40xf32>)
outs(%4 : tensor<10x40xf32>) -> tensor<10x40xf32>
- return %5 : tensor<10x40xf32>
+ util.return %5 : tensor<10x40xf32>
}
-// CHECK-LABEL: func @aTransposeBMatmul
+// CHECK-LABEL: util.func public @aTransposeBMatmul
// CHECK-SAME: %[[ARG0:.+]]: tensor<10x20xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<40x20xf32>
// CHECK: %[[RESULT:.+]] = linalg.matmul_transpose_b
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] :
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
-func.func @aTransposeBBatchMatmul(%arg0 : tensor<5x10x20xf32>,
+// -----
+
+util.func public @aTransposeBBatchMatmul(%arg0 : tensor<5x10x20xf32>,
%arg1 : tensor<5x40x20xf32>) -> tensor<5x10x40xf32> {
%0 = tensor.empty() : tensor<5x20x40xf32>
%1 = linalg.generic {
@@ -202,16 +209,18 @@
%4 = linalg.fill ins(%3 : f32) outs(%2 : tensor<5x10x40xf32>) -> tensor<5x10x40xf32>
%5 = linalg.batch_matmul ins(%arg0, %1 : tensor<5x10x20xf32>, tensor<5x20x40xf32>)
outs(%4 : tensor<5x10x40xf32>) -> tensor<5x10x40xf32>
- return %5 : tensor<5x10x40xf32>
+ util.return %5 : tensor<5x10x40xf32>
}
-// CHECK-LABEL: func @aTransposeBBatchMatmul
+// CHECK-LABEL: util.func public @aTransposeBBatchMatmul
// CHECK-SAME: %[[ARG0:.+]]: tensor<5x10x20xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<5x40x20xf32>
// CHECK: %[[RESULT:.+]] = linalg.batch_matmul_transpose_b
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] :
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
-func.func @generic_fill(%arg0: tensor<?x?xf32>) -> tensor<1x1x?x?xf32> {
+// -----
+
+util.func public @generic_fill(%arg0: tensor<?x?xf32>) -> tensor<1x1x?x?xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -225,9 +234,9 @@
^bb0(%out: f32):
linalg.yield %cst : f32
} -> tensor<1x1x?x?xf32>
- return %1 : tensor<1x1x?x?xf32>
+ util.return %1 : tensor<1x1x?x?xf32>
}
-// CHECK-LABEL: func @generic_fill
+// CHECK-LABEL: util.func public @generic_fill
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[EMPTY:.+]] = tensor.empty
@@ -235,12 +244,12 @@
// CHECK: %[[RESULT:.+]] = linalg.fill
// CHECK-SAME: ins(%[[CST]] : f32)
// CHECK-SAME: outs(%[[EMPTY]] : tensor<1x1x?x?xf32>)
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
#map = affine_map<(d0) -> (d0)>
-func.func @test_rank_reduce(%A : tensor<1x1x5120xf32>, %B : tensor<5120xf32>) -> tensor<5120xf32> {
+util.func public @test_rank_reduce(%A : tensor<1x1x5120xf32>, %B : tensor<5120xf32>) -> tensor<5120xf32> {
%c0 = arith.constant 0 : index
%0 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} outs(%B : tensor<5120xf32>) {
^bb0(%out: f32):
@@ -248,17 +257,17 @@
%extracted = tensor.extract %A[%c0, %c0, %12] : tensor<1x1x5120xf32>
linalg.yield %extracted : f32
} -> tensor<5120xf32>
- return %0 : tensor<5120xf32>
+ util.return %0 : tensor<5120xf32>
}
-// CHECK-LABEL: func @test_rank_reduce
+// CHECK-LABEL: util.func public @test_rank_reduce
// CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [1, 1, 5120] [1, 1, 1]
// CHECK-SAME: tensor<1x1x5120xf32> to tensor<5120xf32>
// -----
#map = affine_map<(d0, d1) -> (d0, d1)>
-func.func @test_slice_middle(%A : tensor<64x64x64xf32>, %B : tensor<64x64xf32>) -> tensor<64x64xf32> {
+util.func public @test_slice_middle(%A : tensor<64x64x64xf32>, %B : tensor<64x64xf32>) -> tensor<64x64xf32> {
%c0 = arith.constant 0 : index
%0 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%B : tensor<64x64xf32>) {
^bb0(%out: f32):
@@ -267,16 +276,16 @@
%extracted = tensor.extract %A[%i1, %c0, %i2] : tensor<64x64x64xf32>
linalg.yield %extracted : f32
} -> tensor<64x64xf32>
- return %0 : tensor<64x64xf32>
+ util.return %0 : tensor<64x64xf32>
}
-// CHECK-LABEL: func @test_slice_middle
+// CHECK-LABEL: util.func public @test_slice_middle
// CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [64, 1, 64] [1, 1, 1]
// CHECK-SAME: tensor<64x64x64xf32> to tensor<64x64xf32>
// -----
-func.func @test_trailing_elementwise(%arg0: tensor<180x320x1xf32>) -> tensor<320xf32> {
+util.func public @test_trailing_elementwise(%arg0: tensor<180x320x1xf32>) -> tensor<320xf32> {
%c0 = arith.constant 0 : index
%c179 = arith.constant 179 : index
%70 = tensor.empty() : tensor<320xf32>
@@ -286,10 +295,10 @@
%extracted = tensor.extract %arg0[%c0, %76, %c0] : tensor<180x320x1xf32>
linalg.yield %extracted : f32
} -> tensor<320xf32>
- return %71 : tensor<320xf32>
+ util.return %71 : tensor<320xf32>
}
-// CHECK-LABEL: func @test_trailing_elementwise
+// CHECK-LABEL: util.func public @test_trailing_elementwise
// CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [1, 320, 1] [1, 1, 1]
// CHECK-SAME: tensor<180x320x1xf32> to tensor<320xf32>
@@ -298,8 +307,8 @@
// This currently should not be raised as the operation does not remain
// elementwise after raising the tensor.extract to input.
#map = affine_map<(d0, d1) -> (d0, d1)>
-// CHECK-LABEL: func @test_non_slice
-func.func @test_non_slice(%A : tensor<128x128x128xf32>, %B : tensor<64x64xf32>) -> tensor<64x64xf32> {
+// CHECK-LABEL: util.func public @test_non_slice
+util.func public @test_non_slice(%A : tensor<128x128x128xf32>, %B : tensor<64x64xf32>) -> tensor<64x64xf32> {
%c0 = arith.constant 0 : index
// CHECK: linalg.generic
%0 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%B : tensor<64x64xf32>) {
@@ -309,13 +318,13 @@
%extracted = tensor.extract %A[%i1, %c0, %i2] : tensor<128x128x128xf32>
linalg.yield %extracted : f32
} -> tensor<64x64xf32>
- return %0 : tensor<64x64xf32>
+ util.return %0 : tensor<64x64xf32>
}
// -----
#map = affine_map<(d0, d1) -> (d0, d1)>
-func.func @test_slice_negate_cat_peephole(%arg0: tensor<1x32x1x128xf16>) -> tensor<1x32x1x128xf16> {
+util.func public @test_slice_negate_cat_peephole(%arg0: tensor<1x32x1x128xf16>) -> tensor<1x32x1x128xf16> {
%1 = tensor.empty() : tensor<1x32x1x128xf16>
%2 = tensor.empty() : tensor<32x64xf16>
%extracted_slice = tensor.extract_slice %arg0[0, 0, 0, 0] [1, 32, 1, 64] [1, 1, 1, 1] : tensor<1x32x1x128xf16> to tensor<32x64xf16>
@@ -327,10 +336,10 @@
} -> tensor<32x64xf16>
%inserted_slice = tensor.insert_slice %3 into %1[0, 0, 0, 0] [1, 32, 1, 64] [1, 1, 1, 1] : tensor<32x64xf16> into tensor<1x32x1x128xf16>
%inserted_slice_1 = tensor.insert_slice %extracted_slice into %inserted_slice[0, 0, 0, 64] [1, 32, 1, 64] [1, 1, 1, 1] : tensor<32x64xf16> into tensor<1x32x1x128xf16>
- return %inserted_slice_1 : tensor<1x32x1x128xf16>
+ util.return %inserted_slice_1 : tensor<1x32x1x128xf16>
}
-// CHECK-LABEL: func.func @test_slice_negate_cat_peephole
+// CHECK-LABEL: util.func public @test_slice_negate_cat_peephole
// CHECK-SAME: %[[ARG0:.+]]: tensor<1x32x1x128xf16>
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[EXPIN:.+]] = tensor.expand_shape %[[ARG0]] {{\[\[}}0], [1], [2], [3, 4]] : tensor<1x32x1x128xf16> into tensor<1x32x1x2x64xf16>
@@ -350,12 +359,12 @@
// CHECK: linalg.yield %[[SEL]] : f16
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[NREV]] {{\[\[}}0], [1], [2], [3, 4]] : tensor<1x32x1x2x64xf16> into tensor<1x32x1x128xf16>
-// CHECK: return %[[COLLAPSE]]
+// CHECK: util.return %[[COLLAPSE]]
// -----
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @test_slice_negate_cat_peephole_dynamic(%arg0: tensor<1x32x?x128xf16>) -> tensor<1x32x?x128xf16> {
+util.func public @test_slice_negate_cat_peephole_dynamic(%arg0: tensor<1x32x?x128xf16>) -> tensor<1x32x?x128xf16> {
%c2 = arith.constant 2 : index
%d2 = tensor.dim %arg0, %c2 : tensor<1x32x?x128xf16>
%1 = tensor.empty(%d2) : tensor<1x32x?x128xf16>
@@ -369,21 +378,21 @@
} -> tensor<32x?x64xf16>
%inserted_slice = tensor.insert_slice %3 into %1[0, 0, 0, 0] [1, 32, %d2, 64] [1, 1, 1, 1] : tensor<32x?x64xf16> into tensor<1x32x?x128xf16>
%inserted_slice_1 = tensor.insert_slice %extracted_slice into %inserted_slice[0, 0, 0, 64] [1, 32, %d2, 64] [1, 1, 1, 1] : tensor<32x?x64xf16> into tensor<1x32x?x128xf16>
- return %inserted_slice_1 : tensor<1x32x?x128xf16>
+ util.return %inserted_slice_1 : tensor<1x32x?x128xf16>
}
/// Verify that the pattern kicks in for a simple dynamic example.
-// CHECK-LABEL: func.func @test_slice_negate_cat_peephole_dynamic
+// CHECK-LABEL: util.func public @test_slice_negate_cat_peephole_dynamic
// CHECK: tensor.expand_shape
// CHECK: linalg.generic
// CHECK: tensor.extract
// CHECK: %[[COL:.+]] = tensor.collapse_shape
-// CHECK: return %[[COL]]
+// CHECK: util.return %[[COL]]
// -----
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @test_slice_negate_cat_peephole_dynamic(%arg0: tensor<32x?x128xf16>) -> tensor<32x?x128xf16> {
+util.func public @test_slice_negate_cat_peephole_dynamic(%arg0: tensor<32x?x128xf16>) -> tensor<32x?x128xf16> {
%c2 = arith.constant 2 : index
%d2 = tensor.dim %arg0, %c2 : tensor<32x?x128xf16>
%1 = tensor.empty(%d2) : tensor<32x?x128xf16>
@@ -396,13 +405,13 @@
linalg.yield %5 : f16
} -> tensor<32x?x64xf16>
%concat = tensor.concat dim(2) %3, %extracted_slice : (tensor<32x?x64xf16>, tensor<32x?x64xf16>) -> tensor<32x?x128xf16>
- return %concat : tensor<32x?x128xf16>
+ util.return %concat : tensor<32x?x128xf16>
}
/// Verify that the pattern kicks in for tensor.concat as well.
-// CHECK-LABEL: func.func @test_slice_negate_cat_peephole_dynamic
+// CHECK-LABEL: util.func public @test_slice_negate_cat_peephole_dynamic
// CHECK: tensor.expand_shape
// CHECK: linalg.generic
// CHECK: tensor.extract
// CHECK: %[[COL:.+]] = tensor.collapse_shape
-// CHECK: return %[[COL]]
+// CHECK: util.return %[[COL]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
index 289f372..ca768a2 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/remove_zero_extent_tensors.mlir
@@ -1,38 +1,38 @@
// RUN: iree-opt --split-input-file --allow-unregistered-dialect \
-// RUN: --pass-pipeline="builtin.module(func.func(iree-global-opt-remove-zero-extent-tensors))" \
+// RUN: --pass-pipeline="builtin.module(util.func(iree-global-opt-remove-zero-extent-tensors))" \
// RUN: %s | FileCheck %s
-func.func @zero_sized_operands(%arg0 : tensor<?x0xf32>, %arg1 : index) -> tensor<?x?xf32> {
+util.func public @zero_sized_operands(%arg0 : tensor<?x0xf32>, %arg1 : index) -> tensor<?x?xf32> {
%0 = tensor.empty(%arg1): tensor<0x?xf32>
%1 = "some_op"(%arg0, %0) : (tensor<?x0xf32>, tensor<0x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func @zero_sized_operands
+// CHECK: util.func public @zero_sized_operands
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x0xf32>
// CHECK-SAME: %[[ARG1:.+]]: index
// CHECK: %[[EMPTY0:.+]] = tensor.empty(%[[ARG1]])
// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]]
// CHECK: %[[EMPTY1:.+]] = tensor.empty(%[[DIM]])
// CHECK: %[[RESULT:.+]] = "some_op"(%[[EMPTY1]], %[[EMPTY0]]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @zero_sized_tensor_insert(%arg0 : tensor<?x?xf32>, %arg1 : tensor<0x?xf32>,
+util.func public @zero_sized_tensor_insert(%arg0 : tensor<?x?xf32>, %arg1 : tensor<0x?xf32>,
%arg2 : index) -> tensor<?x?xf32> {
%1 = tensor.insert_slice %arg1 into %arg0[0, 0] [0, %arg2] [1, 1] : tensor<0x?xf32> into tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
+ util.return %1 : tensor<?x?xf32>
}
-// CHECK: func @zero_sized_tensor_insert(%[[ARG0:.+]]: tensor<?x?xf32>
-// CHECK: return %[[ARG0]]
+// CHECK: util.func public @zero_sized_tensor_insert(%[[ARG0:.+]]: tensor<?x?xf32>
+// CHECK: util.return %[[ARG0]]
// -----
-func.func @zero_sizes_tensor_insert_dest(%arg0 : tensor<0x?xf32>, %arg1 : index) -> tensor<0x?xf32> {
+util.func public @zero_sizes_tensor_insert_dest(%arg0 : tensor<0x?xf32>, %arg1 : index) -> tensor<0x?xf32> {
%0 = tensor.empty(%arg1) : tensor<0x?xf32>
%1 = tensor.insert_slice %arg0 into %0[0, 0] [0, %arg1] [1, 1] : tensor<0x?xf32> into tensor<0x?xf32>
- return %1 : tensor<0x?xf32>
+ util.return %1 : tensor<0x?xf32>
}
-// CHECK: func @zero_sizes_tensor_insert_dest(%[[ARG0:.+]]: tensor<0x?xf32>, %[[ARG1:.+]]: index)
+// CHECK: util.func public @zero_sizes_tensor_insert_dest(%[[ARG0:.+]]: tensor<0x?xf32>, %[[ARG1:.+]]: index)
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[ARG1]])
-// CHECK: return %[[EMPTY]]
+// CHECK: util.return %[[EMPTY]]
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
index 5ca9aac..2d95dcd 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/set_encoding.mlir
@@ -1,16 +1,16 @@
// RUN: iree-opt --iree-global-opt-set-encoding --cse --split-input-file %s | FileCheck %s
-func.func @matmul_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @matmul_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_f32f32f32(
+// CHECK: util.func public @matmul_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf32>
@@ -43,21 +43,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_f32f32f32_dynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @matmul_f32f32f32_dynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_f32f32f32_dynamic(
+// CHECK: util.func public @matmul_f32f32f32_dynamic(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG1:.+]]: tensor<?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -93,20 +93,20 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [{{.*}}] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_i8i8i32(%arg0 : tensor<100x250xi8>, %arg1 : tensor<250x500xi8>,
+util.func public @matmul_i8i8i32(%arg0 : tensor<100x250xi8>, %arg1 : tensor<250x500xi8>,
%arg2 : tensor<100x500xi32>) -> tensor<100x500xi32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xi8>, tensor<250x500xi8>)
outs(%arg2 : tensor<100x500xi32>) -> tensor<100x500xi32>
- return %0 : tensor<100x500xi32>
+ util.return %0 : tensor<100x500xi32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_i8i8i32(
+// CHECK: util.func public @matmul_i8i8i32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xi8>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xi8>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xi32>
@@ -130,20 +130,20 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_f16f16f32(%arg0 : tensor<100x250xf16>, %arg1 : tensor<250x500xf16>,
+util.func public @matmul_f16f16f32(%arg0 : tensor<100x250xf16>, %arg1 : tensor<250x500xf16>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xf16>, tensor<250x500xf16>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_f16f16f32(
+// CHECK: util.func public @matmul_f16f16f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf32>
@@ -167,20 +167,20 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_f16f16f16(%arg0 : tensor<100x250xf16>, %arg1 : tensor<250x500xf16>,
+util.func public @matmul_f16f16f16(%arg0 : tensor<100x250xf16>, %arg1 : tensor<250x500xf16>,
%arg2 : tensor<100x500xf16>) -> tensor<100x500xf16> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xf16>, tensor<250x500xf16>)
outs(%arg2 : tensor<100x500xf16>) -> tensor<100x500xf16>
- return %0 : tensor<100x500xf16>
+ util.return %0 : tensor<100x500xf16>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_f16f16f16(
+// CHECK: util.func public @matmul_f16f16f16(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf16>
@@ -204,20 +204,20 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_bf16bf16f32(%arg0 : tensor<100x250xbf16>, %arg1 : tensor<250x500xbf16>,
+util.func public @matmul_bf16bf16f32(%arg0 : tensor<100x250xbf16>, %arg1 : tensor<250x500xbf16>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xbf16>, tensor<250x500xbf16>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_bf16bf16f32(
+// CHECK: util.func public @matmul_bf16bf16f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xbf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xbf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf32>
@@ -241,20 +241,20 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_bf16bf16bf16(%arg0 : tensor<100x250xbf16>, %arg1 : tensor<250x500xbf16>,
+util.func public @matmul_bf16bf16bf16(%arg0 : tensor<100x250xbf16>, %arg1 : tensor<250x500xbf16>,
%arg2 : tensor<100x500xbf16>) -> tensor<100x500xbf16> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<100x250xbf16>, tensor<250x500xbf16>)
outs(%arg2 : tensor<100x500xbf16>) -> tensor<100x500xbf16>
- return %0 : tensor<100x500xbf16>
+ util.return %0 : tensor<100x500xbf16>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_bf16bf16bf16(
+// CHECK: util.func public @matmul_bf16bf16bf16(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xbf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xbf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xbf16>
@@ -278,21 +278,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_f32f32f32(%arg0 : tensor<64x100x250xf32>, %arg1 : tensor<64x250x500xf32>,
+util.func public @batch_matmul_f32f32f32(%arg0 : tensor<64x100x250xf32>, %arg1 : tensor<64x250x500xf32>,
%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xf32>, tensor<64x250x500xf32>)
outs(%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32>
- return %0 : tensor<64x100x500xf32>
+ util.return %0 : tensor<64x100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_f32f32f32(
+// CHECK: util.func public @batch_matmul_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xf32>
@@ -329,21 +329,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_f32f32f32_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>,
+util.func public @batch_matmul_f32f32f32_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>,
%arg2 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
outs(%arg2 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
- return %0 : tensor<?x?x?xf32>
+ util.return %0 : tensor<?x?x?xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_f32f32f32_dynamic(
+// CHECK: util.func public @batch_matmul_f32f32f32_dynamic(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?xf32>, %[[ARG2:.+]]: tensor<?x?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -386,21 +386,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [{{.*}}] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_f16f16f16(%arg0 : tensor<64x100x250xf16>, %arg1 : tensor<64x250x500xf16>,
+util.func public @batch_matmul_f16f16f16(%arg0 : tensor<64x100x250xf16>, %arg1 : tensor<64x250x500xf16>,
%arg2 : tensor<64x100x500xf16>) -> tensor<64x100x500xf16> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xf16>, tensor<64x250x500xf16>)
outs(%arg2 : tensor<64x100x500xf16>) -> tensor<64x100x500xf16>
- return %0 : tensor<64x100x500xf16>
+ util.return %0 : tensor<64x100x500xf16>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_f16f16f16(
+// CHECK: util.func public @batch_matmul_f16f16f16(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xf16>
@@ -437,21 +437,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_f16f16f32(%arg0 : tensor<64x100x250xf16>, %arg1 : tensor<64x250x500xf16>,
+util.func public @batch_matmul_f16f16f32(%arg0 : tensor<64x100x250xf16>, %arg1 : tensor<64x250x500xf16>,
%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xf16>, tensor<64x250x500xf16>)
outs(%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32>
- return %0 : tensor<64x100x500xf32>
+ util.return %0 : tensor<64x100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_f16f16f32(
+// CHECK: util.func public @batch_matmul_f16f16f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xf32>
@@ -488,21 +488,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_bf16bf16bf16(%arg0 : tensor<64x100x250xbf16>, %arg1 : tensor<64x250x500xbf16>,
+util.func public @batch_matmul_bf16bf16bf16(%arg0 : tensor<64x100x250xbf16>, %arg1 : tensor<64x250x500xbf16>,
%arg2 : tensor<64x100x500xbf16>) -> tensor<64x100x500xbf16> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xbf16>, tensor<64x250x500xbf16>)
outs(%arg2 : tensor<64x100x500xbf16>) -> tensor<64x100x500xbf16>
- return %0 : tensor<64x100x500xbf16>
+ util.return %0 : tensor<64x100x500xbf16>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_bf16bf16bf16(
+// CHECK: util.func public @batch_matmul_bf16bf16bf16(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xbf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xbf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xbf16>
@@ -539,21 +539,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_bf16bf16f32(%arg0 : tensor<64x100x250xbf16>, %arg1 : tensor<64x250x500xbf16>,
+util.func public @batch_matmul_bf16bf16f32(%arg0 : tensor<64x100x250xbf16>, %arg1 : tensor<64x250x500xbf16>,
%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xbf16>, tensor<64x250x500xbf16>)
outs(%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32>
- return %0 : tensor<64x100x500xf32>
+ util.return %0 : tensor<64x100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_bf16bf16f32(
+// CHECK: util.func public @batch_matmul_bf16bf16f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xbf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xbf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xf32>
@@ -590,21 +590,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_i8i8i32(%arg0 : tensor<64x100x250xi8>, %arg1 : tensor<64x250x500xi8>,
+util.func public @batch_matmul_i8i8i32(%arg0 : tensor<64x100x250xi8>, %arg1 : tensor<64x250x500xi8>,
%arg2 : tensor<64x100x500xi32>) -> tensor<64x100x500xi32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x100x250xi8>, tensor<64x250x500xi8>)
outs(%arg2 : tensor<64x100x500xi32>) -> tensor<64x100x500xi32>
- return %0 : tensor<64x100x500xi32>
+ util.return %0 : tensor<64x100x500xi32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_i8i8i32(
+// CHECK: util.func public @batch_matmul_i8i8i32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<64x100x250xi8>
// CHECK-SAME: %[[ARG1:.+]]: tensor<64x250x500xi8>
// CHECK-SAME: %[[ARG2:.+]]: tensor<64x100x500xi32>
@@ -641,21 +641,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [64, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @vecmat_f32f32f32(%arg0 : tensor<250xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @vecmat_f32f32f32(%arg0 : tensor<250xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<500xf32>) -> tensor<500xf32> {
%0 = linalg.vecmat ins(%arg0, %arg1 : tensor<250xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<500xf32>) -> tensor<500xf32>
- return %0 : tensor<500xf32>
+ util.return %0 : tensor<500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)>
-// CHECK: func @vecmat_f32f32f32(
+// CHECK: util.func public @vecmat_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<500xf32>
@@ -685,21 +685,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[VECMAT]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0] [500] [1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matvec_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250xf32>,
+util.func public @matvec_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<250xf32>,
%arg2 : tensor<100xf32>) -> tensor<100xf32> {
%0 = linalg.matvec ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<250xf32>)
outs(%arg2 : tensor<100xf32>) -> tensor<100xf32>
- return %0 : tensor<100xf32>
+ util.return %0 : tensor<100xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)>
-// CHECK: func @matvec_f32f32f32(
+// CHECK: util.func public @matvec_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100xf32>
@@ -729,21 +729,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATVEC]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0] [100] [1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_vecmat_f32f32f32(%arg0 : tensor<3x250xf32>, %arg1 : tensor<3x250x500xf32>,
+util.func public @batch_vecmat_f32f32f32(%arg0 : tensor<3x250xf32>, %arg1 : tensor<3x250x500xf32>,
%arg2 : tensor<3x500xf32>) -> tensor<3x500xf32> {
%0 = linalg.batch_vecmat ins(%arg0, %arg1 : tensor<3x250xf32>, tensor<3x250x500xf32>)
outs(%arg2 : tensor<3x500xf32>) -> tensor<3x500xf32>
- return %0 : tensor<3x500xf32>
+ util.return %0 : tensor<3x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @batch_vecmat_f32f32f32(
+// CHECK: util.func public @batch_vecmat_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<3x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<3x250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<3x500xf32>
@@ -777,21 +777,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[VECMAT]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [3, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matvec_f32f32f32_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?xf32>,
+util.func public @batch_matvec_f32f32f32_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.batch_matvec ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ util.return %0 : tensor<?x?xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @batch_matvec_f32f32f32_dynamic(
+// CHECK: util.func public @batch_matvec_f32f32f32_dynamic(
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
@@ -830,28 +830,28 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATVEC]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [{{.*}}] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @fold_fill_with_set_encoding(%arg0 : index, %arg1 : index)
+util.func public @fold_fill_with_set_encoding(%arg0 : index, %arg1 : index)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>> {
%cst = arith.constant 0.0 : f32
%0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
%2 = iree_linalg_ext.set_encoding %1 : tensor<?x?xf32>
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
- return %2 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
+ util.return %2 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
}
-// CHECK: func @fold_fill_with_set_encoding(
+// CHECK: util.func public @fold_fill_with_set_encoding(
// CHECK: %[[EMPTY:.+]] = tensor.empty(%{{.+}}, %{{.+}}) : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>
// CHECK: %[[FILL:.+]] = linalg.fill
// CHECK-SAME: outs(%[[EMPTY]] : tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32]>>)
-// CHECK: return %[[FILL]]
+// CHECK: util.return %[[FILL]]
// -----
-func.func @fold_fill_with_tensor_pad(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index)
+util.func public @fold_fill_with_tensor_pad(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index)
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>> {
%cst = arith.constant 0.0 : f32
%0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
@@ -862,14 +862,14 @@
} : tensor<?x?xf32> to tensor<?x?xf32>
%3 = iree_linalg_ext.set_encoding %2 : tensor<?x?xf32>
-> tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
- return %3 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
+ util.return %3 : tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
}
-// CHECK: func @fold_fill_with_tensor_pad(
+// CHECK: util.func public @fold_fill_with_tensor_pad(
// CHECK: %[[EMPTY:.+]] = tensor.empty(
// CHECK-SAME: tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32]>>
// CHECK: %[[FILL:.+]] = linalg.fill
// CHECK-SAME: outs(%[[EMPTY]] :
-// CHECK: return %[[FILL]]
+// CHECK: util.return %[[FILL]]
// -----
@@ -882,7 +882,7 @@
translation_info = <CPUDefault>>
-func.func @preset_compilation_info(
+util.func public @preset_compilation_info(
%arg0 : tensor<?x?xf32>,
%arg1 : tensor<?x?xf32>,
%arg2 : tensor<?x?xf32>,
@@ -893,9 +893,9 @@
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%1 = linalg.batch_matmul {compilation_info = #compilation1} ins(%arg3, %arg4 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
outs(%arg5 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
- return %0, %1 : tensor<?x?xf32>, tensor<?x?x?xf32>
+ util.return %0, %1 : tensor<?x?xf32>, tensor<?x?x?xf32>
}
-// CHECK-LABEL: func.func @preset_compilation_info
+// CHECK-LABEL: util.func public @preset_compilation_info
// CHECK-NOT: set_encoding
// CHECK-NOT: unset_encoding
// CHECK: linalg.matmul
@@ -903,7 +903,7 @@
// -----
-func.func @batch_matmul_truncf_f16f16f32(%arg0 : tensor<64x100x250xf32>, %arg1 : tensor<64x250x500xf32>,
+util.func public @batch_matmul_truncf_f16f16f32(%arg0 : tensor<64x100x250xf32>, %arg1 : tensor<64x250x500xf32>,
%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32> {
%0 = tensor.empty() : tensor<64x250x500xf16>
%casted0 = arith.truncf %arg0 : tensor<64x100x250xf32> to tensor<64x100x250xf16>
@@ -918,10 +918,10 @@
} -> tensor<64x250x500xf16>
%1 = linalg.batch_matmul ins(%casted0, %casted1 : tensor<64x100x250xf16>, tensor<64x250x500xf16>)
outs(%arg2 : tensor<64x100x500xf32>) -> tensor<64x100x500xf32>
- return %1 : tensor<64x100x500xf32>
+ util.return %1 : tensor<64x100x500xf32>
}
-// CHECK: func @batch_matmul_truncf_f16f16f32(%[[ARG0:.+]]: tensor<64x100x250xf32>, %[[ARG1:.+]]: tensor<64x250x500xf32>
+// CHECK: util.func public @batch_matmul_truncf_f16f16f32(%[[ARG0:.+]]: tensor<64x100x250xf32>, %[[ARG1:.+]]: tensor<64x250x500xf32>
// CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<64x250x500xf16>
// CHECK-DAG: arith.truncf %[[ARG0]] : tensor<64x100x250xf32> to tensor<64x100x250xf16>
// CHECK: linalg.generic
@@ -931,26 +931,26 @@
// -----
-func.func @matmul_casted_from_i1_f32f32f32(%arg0 : tensor<64x256xi1>,
+util.func public @matmul_casted_from_i1_f32f32f32(%arg0 : tensor<64x256xi1>,
%arg1 : tensor<256x128xf32>) -> tensor<64x128xf32> {
%cst = arith.constant 0.000000e+00 : f32
%casted = arith.uitofp %arg0 : tensor<64x256xi1> to tensor<64x256xf32>
%0 = tensor.empty() : tensor<64x128xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<64x128xf32>) -> tensor<64x128xf32>
%2 = linalg.matmul ins(%casted, %arg1 : tensor<64x256xf32>, tensor<256x128xf32>) outs(%1 : tensor<64x128xf32>) -> tensor<64x128xf32>
- return %2 : tensor<64x128xf32>
+ util.return %2 : tensor<64x128xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func.func @matmul_casted_from_i1_f32f32f32
+// CHECK: util.func public @matmul_casted_from_i1_f32f32f32
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<64x256xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32], original_type = tensor<256x128xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32], original_type = tensor<64x128xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// -----
-func.func @matmul_generic_casted_from_i1_f32f32f32(%arg0 : tensor<64x256xi1>,
+util.func public @matmul_generic_casted_from_i1_f32f32f32(%arg0 : tensor<64x256xi1>,
%arg1 : tensor<256x128xf32>) -> tensor<64x128xf32> {
%cst = arith.constant 0.000000e+00 : f32
%init = tensor.empty() : tensor<64x256xf32>
@@ -966,28 +966,28 @@
%0 = tensor.empty() : tensor<64x128xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<64x128xf32>) -> tensor<64x128xf32>
%2 = linalg.matmul ins(%casted, %arg1 : tensor<64x256xf32>, tensor<256x128xf32>) outs(%1 : tensor<64x128xf32>) -> tensor<64x128xf32>
- return %2 : tensor<64x128xf32>
+ util.return %2 : tensor<64x128xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func.func @matmul_generic_casted_from_i1_f32f32f32
+// CHECK: util.func public @matmul_generic_casted_from_i1_f32f32f32
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], original_type = tensor<64x256xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32], original_type = tensor<256x128xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: set_encoding {{.+}} tensor<?x?xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32], original_type = tensor<64x128xf32>, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// -----
-func.func @matmul_f32f32f32_narrow_M(%arg0 : tensor<2x250xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @matmul_f32f32f32_narrow_M(%arg0 : tensor<2x250xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<2x500xf32>) -> tensor<2x500xf32> {
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<2x250xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<2x500xf32>) -> tensor<2x500xf32>
- return %0 : tensor<2x500xf32>
+ util.return %0 : tensor<2x500xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_f32f32f32_narrow_M(
+// CHECK: util.func public @matmul_f32f32f32_narrow_M(
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<2x250xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], matmul_narrow_M = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<250x500xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32], matmul_narrow_M = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<2x500xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32], matmul_narrow_M = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
@@ -995,16 +995,16 @@
// -----
-func.func @batch_matmul_f32f32f32_narrow_MN(%arg0 : tensor<64x4x250xf32>, %arg1 : tensor<64x250x2xf32>,
+util.func public @batch_matmul_f32f32f32_narrow_MN(%arg0 : tensor<64x4x250xf32>, %arg1 : tensor<64x250x2xf32>,
%arg2 : tensor<64x4x2xf32>) -> tensor<64x4x2xf32> {
%0 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<64x4x250xf32>, tensor<64x250x2xf32>)
outs(%arg2 : tensor<64x4x2xf32>) -> tensor<64x4x2xf32>
- return %0 : tensor<64x4x2xf32>
+ util.return %0 : tensor<64x4x2xf32>
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_f32f32f32_narrow_MN(
+// CHECK: util.func public @batch_matmul_f32f32f32_narrow_MN(
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x250xf32, #iree_linalg_ext.encoding<role = LHS, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x250x2xf32, #iree_linalg_ext.encoding<role = RHS, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
// CHECK: iree_linalg_ext.upper_bound_tile_size tensor<64x4x2xf32, #iree_linalg_ext.encoding<role = RESULT, element_types = [f32, f32, f32], matmul_narrow_M = 4 : index, matmul_narrow_N = 2 : index, user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>>
@@ -1012,18 +1012,18 @@
// -----
-func.func @matmul_transpose_a_f32f32f32(%arg0 : tensor<250x100xf32>, %arg1 : tensor<250x500xf32>,
+util.func public @matmul_transpose_a_f32f32f32(%arg0 : tensor<250x100xf32>, %arg1 : tensor<250x500xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<250x100xf32>, tensor<250x500xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_transpose_a_f32f32f32(
+// CHECK: util.func public @matmul_transpose_a_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<250x100xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf32>
@@ -1056,21 +1056,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @matmul_transpose_b_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<500x250xf32>,
+util.func public @matmul_transpose_b_f32f32f32(%arg0 : tensor<100x250xf32>, %arg1 : tensor<500x250xf32>,
%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32> {
%0 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<100x250xf32>, tensor<500x250xf32>)
outs(%arg2 : tensor<100x500xf32>) -> tensor<100x500xf32>
- return %0 : tensor<100x500xf32>
+ util.return %0 : tensor<100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @matmul_transpose_b_f32f32f32(
+// CHECK: util.func public @matmul_transpose_b_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<100x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<500x250xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<100x500xf32>
@@ -1103,21 +1103,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [100, 500] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_transpose_a_f32f32f32(%arg0 : tensor<2x250x100xf32>, %arg1 : tensor<2x250x500xf32>,
+util.func public @batch_matmul_transpose_a_f32f32f32(%arg0 : tensor<2x250x100xf32>, %arg1 : tensor<2x250x500xf32>,
%arg2 : tensor<2x100x500xf32>) -> tensor<2x100x500xf32> {
%0 = linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : tensor<2x250x100xf32>, tensor<2x250x500xf32>)
outs(%arg2 : tensor<2x100x500xf32>) -> tensor<2x100x500xf32>
- return %0 : tensor<2x100x500xf32>
+ util.return %0 : tensor<2x100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_transpose_a_f32f32f32(
+// CHECK: util.func public @batch_matmul_transpose_a_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x250x100xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2x250x500xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<2x100x500xf32>
@@ -1154,21 +1154,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [2, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @batch_matmul_transpose_b_f32f32f32(%arg0 : tensor<2x100x250xf32>, %arg1 : tensor<2x500x250xf32>,
+util.func public @batch_matmul_transpose_b_f32f32f32(%arg0 : tensor<2x100x250xf32>, %arg1 : tensor<2x500x250xf32>,
%arg2 : tensor<2x100x500xf32>) -> tensor<2x100x500xf32> {
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : tensor<2x100x250xf32>, tensor<2x500x250xf32>)
outs(%arg2 : tensor<2x100x500xf32>) -> tensor<2x100x500xf32>
- return %0 : tensor<2x100x500xf32>
+ util.return %0 : tensor<2x100x500xf32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK: func @batch_matmul_transpose_b_f32f32f32(
+// CHECK: util.func public @batch_matmul_transpose_b_f32f32f32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x100x250xf32>
// CHECK-SAME: %[[ARG1:.+]]: tensor<2x500x250xf32>
// CHECK-SAME: %[[ARG2:.+]]: tensor<2x100x500xf32>
@@ -1205,11 +1205,11 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[BATCH_MATMUL]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0, 0] [2, 100, 500] [1, 1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0 : tensor<32x128xi16>, %arg1 : tensor<4096x32x128xi4>,
+util.func public @generic_batch_vecmat_transposed_i16u4i32(%arg0 : tensor<32x128xi16>, %arg1 : tensor<4096x32x128xi4>,
%arg2 : tensor<4096x32xi32>) -> tensor<4096x32xi32> {
%0 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<32x128xi16>, tensor<4096x32x128xi4>) outs(%arg2 : tensor<4096x32xi32>) {
^bb0(%in: i16, %in_5: i4, %out: i32):
@@ -1219,14 +1219,14 @@
%25 = arith.addi %24, %out : i32
linalg.yield %25 : i32
} -> tensor<4096x32xi32>
- return %0 : tensor<4096x32xi32>
+ util.return %0 : tensor<4096x32xi32>
}
// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK: func @generic_batch_vecmat_transposed_i16u4i32(
+// CHECK: util.func public @generic_batch_vecmat_transposed_i16u4i32(
// CHECK-SAME: %[[ARG0:.+]]: tensor<32x128xi16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<4096x32x128xi4>
// CHECK-SAME: %[[ARG2:.+]]: tensor<4096x32xi32>
@@ -1262,21 +1262,21 @@
// CHECK-SAME: outs(%[[OUTS]] :
// CHECK: %[[RESULT_PADDED:.+]] = iree_linalg_ext.unset_encoding %[[GENERIC]]
// CHECK: %[[RESULT:.+]] = tensor.extract_slice %[[RESULT_PADDED]][0, 0] [4096, 32] [1, 1]
-// CHECK: return %[[RESULT]]
+// CHECK: util.return %[[RESULT]]
// -----
-func.func @dot(%arg0: tensor<1024xf32>, %arg1: tensor<1024xf32>) -> tensor<f32> {
+util.func public @dot(%arg0: tensor<1024xf32>, %arg1: tensor<1024xf32>) -> tensor<f32> {
%res = "stablehlo.dot"(%arg0, %arg1) : (tensor<1024xf32>, tensor<1024xf32>) -> tensor<f32>
- return %res : tensor<f32>
+ util.return %res : tensor<f32>
}
-// CHECK: func @dot(
+// CHECK: util.func public @dot(
// CHECK: stablehlo.dot %{{.*}}, %{{.*}} : (tensor<1024xf32>, tensor<1024xf32>) -> tensor<f32>
// -----
-func.func @multi_m_dim_generic(%arg0 : tensor<64x4x128xf32>, %arg1 : tensor<128x512xf32>,
+util.func public @multi_m_dim_generic(%arg0 : tensor<64x4x128xf32>, %arg1 : tensor<128x512xf32>,
%arg2 : tensor<64x4x512xf32>) -> tensor<64x4x512xf32> {
%4 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>,
@@ -1289,17 +1289,17 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<64x4x512xf32>
- return %4 : tensor<64x4x512xf32>
+ util.return %4 : tensor<64x4x512xf32>
}
-// CHECK: func @multi_m_dim_generic(
+// CHECK: util.func public @multi_m_dim_generic(
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<64x4x128xf32>, tensor<128x512xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<64x4x512xf32>)
// -----
-func.func @multi_n_dim_generic(%arg0 : tensor<256x128xf32>, %arg1 : tensor<128x64x8xf32>,
+util.func public @multi_n_dim_generic(%arg0 : tensor<256x128xf32>, %arg1 : tensor<128x64x8xf32>,
%arg2 : tensor<256x64x8xf32>) -> tensor<256x64x8xf32> {
%4 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d2)>,
@@ -1312,17 +1312,17 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<256x64x8xf32>
- return %4 : tensor<256x64x8xf32>
+ util.return %4 : tensor<256x64x8xf32>
}
-// CHECK: func @multi_n_dim_generic(
+// CHECK: util.func public @multi_n_dim_generic(
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<256x128xf32>, tensor<128x64x8xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<256x64x8xf32>)
// -----
-func.func @multi_k_dim_generic(%arg0 : tensor<256x64x2xf32>, %arg1 : tensor<64x2x512xf32>,
+util.func public @multi_k_dim_generic(%arg0 : tensor<256x64x2xf32>, %arg1 : tensor<64x2x512xf32>,
%arg2 : tensor<256x512xf32>) -> tensor<256x512xf32> {
%4 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>,
@@ -1335,17 +1335,17 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<256x512xf32>
- return %4 : tensor<256x512xf32>
+ util.return %4 : tensor<256x512xf32>
}
-// CHECK: func @multi_k_dim_generic(
+// CHECK: util.func public @multi_k_dim_generic(
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<256x64x2xf32>, tensor<64x2x512xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<256x512xf32>)
// -----
-func.func @multi_batch_dim_generic(%arg0 : tensor<4x8x256x128xf32>, %arg1 : tensor<4x8x128x512xf32>,
+util.func public @multi_batch_dim_generic(%arg0 : tensor<4x8x256x128xf32>, %arg1 : tensor<4x8x128x512xf32>,
%arg2 : tensor<4x8x256x512xf32>) -> tensor<4x8x256x512xf32> {
%4 = linalg.generic {
indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>,
@@ -1358,10 +1358,10 @@
%6 = arith.addf %5, %out : f32
linalg.yield %6 : f32
} -> tensor<4x8x256x512xf32>
- return %4 : tensor<4x8x256x512xf32>
+ util.return %4 : tensor<4x8x256x512xf32>
}
-// CHECK: func @multi_batch_dim_generic(
+// CHECK: util.func public @multi_batch_dim_generic(
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x8x256x128xf32>, tensor<4x8x128x512xf32>)
-// CHECK-SAME: outs(%{{.*}} : tensor<4x8x256x512xf32>)
\ No newline at end of file
+// CHECK-SAME: outs(%{{.*}} : tensor<4x8x256x512xf32>)
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir
index 0482ddf..3581392 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir
@@ -1,21 +1,21 @@
// RUN: iree-opt --split-input-file --iree-global-optimization-transformation-pipeline %s | FileCheck %s
// CHECK-LABEL: @empty
-func.func @empty() {
- // CHECK-NEXT: return
- return
+util.func public @empty() {
+ // CHECK-NEXT: util.return
+ util.return
}
// -----
-func.func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
+util.func public @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
%0 = arith.addf %arg0, %arg0 : tensor<4xf32>
%1 = arith.subf %0, %arg0 : tensor<4xf32>
%2 = arith.mulf %1, %arg0 : tensor<4xf32>
- return %2 : tensor<4xf32>
+ util.return %2 : tensor<4xf32>
}
-// CHECK-LABEL: func.func @elementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+// CHECK-LABEL: util.func public @elementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
// CHECK: %{{.+}} = linalg.generic
// CHECK: %{{.+}} = arith.addf %{{.+}}, %{{.+}} : f32
// CHECK: %{{.+}} = linalg.generic
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/transpose_and_decompose_concat.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/transpose_and_decompose_concat.mlir
index 1841853..3a74b9f 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/transpose_and_decompose_concat.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/transpose_and_decompose_concat.mlir
@@ -1,10 +1,10 @@
-// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-global-opt-decompose-concat{enable-concat-transposition=true}, cse))" %s | FileCheck %s
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-global-opt-decompose-concat{enable-concat-transposition=true}, cse))" %s | FileCheck %s
-func.func @test_inner_dim_concat(%arg0: tensor<32x?x64xf16>, %arg1: tensor<32x?x64xf16>) -> tensor<32x?x128xf16> {
+util.func public @test_inner_dim_concat(%arg0: tensor<32x?x64xf16>, %arg1: tensor<32x?x64xf16>) -> tensor<32x?x128xf16> {
%concat = tensor.concat dim(2) %arg0, %arg1 : (tensor<32x?x64xf16>, tensor<32x?x64xf16>) -> tensor<32x?x128xf16>
- return %concat : tensor<32x?x128xf16>
+ util.return %concat : tensor<32x?x128xf16>
}
-// CHECK-LABEL: func.func @test_inner_dim_concat
+// CHECK-LABEL: util.func public @test_inner_dim_concat
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<32x?x64xf16>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<32x?x64xf16>
// CHECK: %[[T0:.+]] = linalg.transpose ins(%[[ARG0]] : tensor<32x?x64xf16>) {{.*}} permutation = [2, 0, 1]
@@ -12,17 +12,17 @@
// CHECK: %[[SLICE0:.+]] = tensor.insert_slice %[[T0]] {{.*}}[0, 0, 0] [64, 32, %{{.*}}] [1, 1, 1]
// CHECK: %[[SLICE1:.+]] = tensor.insert_slice %[[T1]] into %[[SLICE0]][64, 0, 0] [64, 32, %{{.*}}] [1, 1, 1]
// CHECK: %[[T2:.+]] = linalg.transpose ins(%[[SLICE1]] : tensor<128x32x?xf16>) {{.*}} permutation = [1, 2, 0]
-// CHECK: return %[[T2]] : tensor<32x?x128xf16>
+// CHECK: util.return %[[T2]] : tensor<32x?x128xf16>
// -----
-func.func @test_outer_dim_concat(%arg0: tensor<32x?x64xf16>, %arg1: tensor<32x?x64xf16>) -> tensor<64x?x64xf16> {
+util.func public @test_outer_dim_concat(%arg0: tensor<32x?x64xf16>, %arg1: tensor<32x?x64xf16>) -> tensor<64x?x64xf16> {
%concat = tensor.concat dim(0) %arg0, %arg1 : (tensor<32x?x64xf16>, tensor<32x?x64xf16>) -> tensor<64x?x64xf16>
- return %concat : tensor<64x?x64xf16>
+ util.return %concat : tensor<64x?x64xf16>
}
-// CHECK-LABEL: func.func @test_outer_dim_concat
+// CHECK-LABEL: util.func public @test_outer_dim_concat
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<32x?x64xf16>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<32x?x64xf16>
// CHECK: %[[SLICE0:.+]] = tensor.insert_slice %[[ARG0]] {{.*}}[0, 0, 0] [32, %{{.*}}, 64] [1, 1, 1]
// CHECK: %[[SLICE1:.+]] = tensor.insert_slice %[[ARG1]] into %[[SLICE0]][32, 0, 0] [32, %{{.*}}, 64] [1, 1, 1]
-// CHECK: return %[[SLICE1]] : tensor<64x?x64xf16>
+// CHECK: util.return %[[SLICE1]] : tensor<64x?x64xf16>
diff --git a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
index e288e3b..15c0765 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
@@ -27,14 +27,6 @@
namespace {
-// Allowlist of function attributes to retain when importing funcs.
-constexpr const char *kRetainedAttributes[] = {
- "iree.abi",
- "iree.reflection",
- "sym_visibility",
- "noinline",
-};
-
struct IREEImportPublicPass
: public IREEImportPublicBase<IREEImportPublicPass> {
void getDependentDialects(DialectRegistry ®istry) const override {
@@ -295,8 +287,10 @@
};
//===----------------------------------------------------------------------===//
+// Func dialect -> Util patterns
+//===----------------------------------------------------------------------===//
-class BuiltinFuncOpPattern : public OpConversionPattern<func::FuncOp> {
+class FuncFuncOpPattern : public OpConversionPattern<func::FuncOp> {
using OpConversionPattern<func::FuncOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(func::FuncOp srcOp, OpAdaptor adaptor,
@@ -320,26 +314,62 @@
return rewriter.notifyMatchFailure(srcOp, "results failed to convert");
}
+ // Build tied operands index mapping results back to operands.
+ SmallVector<int64_t> tiedOperands;
+ bool anyTiedOperands = false;
+ for (unsigned i = 0; i < srcFuncType.getNumResults(); ++i) {
+ auto tiedAttr =
+ srcOp.getResultAttrOfType<IntegerAttr>(i, "iree.abi.tied");
+ if (tiedAttr) {
+ tiedOperands.push_back(tiedAttr.getInt());
+ } else {
+ tiedOperands.push_back(-1);
+ }
+ }
+ auto tiedOperandsAttr = anyTiedOperands
+ ? rewriter.getIndexArrayAttr(tiedOperands)
+ : ArrayAttr{};
+
// Create new function with converted argument and result types.
// Note that attributes are dropped. Consider preserving some if needed.
auto newFuncType = mlir::FunctionType::get(
srcOp.getContext(), signatureConversion.getConvertedTypes(),
convertedResultTypes);
- auto newFuncOp = rewriter.create<func::FuncOp>(
- srcOp.getLoc(), srcOp.getName(), newFuncType);
+ auto newFuncOp = rewriter.create<IREE::Util::FuncOp>(
+ srcOp.getLoc(), srcOp.getName(), newFuncType, tiedOperandsAttr);
+ newFuncOp.setSymVisibilityAttr(srcOp.getSymVisibilityAttr());
rewriter.inlineRegionBefore(srcOp.getBody(), newFuncOp.getFunctionBody(),
newFuncOp.end());
- // Retain function attributes in the allowlist.
+ // Handle defacto attrs to specialized ones.
+ if (srcOp->hasAttr("noinline")) {
+ newFuncOp.setInliningPolicyAttr(
+ rewriter.getAttr<IREE::Util::InlineNeverAttr>());
+ }
+
+ // Allowlist of function attributes to retain when importing funcs.
+ constexpr const char *kRetainedAttributes[] = {
+ "iree.reflection",
+ "vm.fallback",
+ "vm.signature",
+ "vm.version",
+ };
auto retainedAttributes = ArrayRef<const char *>(
kRetainedAttributes,
sizeof(kRetainedAttributes) / sizeof(kRetainedAttributes[0]));
for (auto retainAttrName : retainedAttributes) {
StringRef attrName(retainAttrName);
Attribute attr = srcOp->getAttr(attrName);
- if (attr) {
+ if (attr)
newFuncOp->setAttr(attrName, attr);
- }
+ }
+
+ // Copy all arg/result attrs. We could filter these.
+ if (auto argAttrs = srcOp.getAllArgAttrs()) {
+ newFuncOp.setAllArgAttrs(argAttrs);
+ }
+ if (auto resultAttrs = srcOp.getAllResultAttrs()) {
+ newFuncOp.setAllResultAttrs(resultAttrs);
}
// Tell the rewriter to convert the region signature.
@@ -355,6 +385,40 @@
}
};
+class FuncCallOpPattern : public OpConversionPattern<func::CallOp> {
+ using OpConversionPattern<func::CallOp>::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(func::CallOp srcOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ SmallVector<Type, 1> resultTypes;
+ if (failed(getTypeConverter()->convertTypes(srcOp.getResultTypes(),
+ resultTypes))) {
+ return rewriter.notifyMatchFailure(srcOp, "results failed to convert");
+ }
+ auto tiedOperandsAttr =
+ srcOp->getAttrOfType<ArrayAttr>("iree.abi.tied_operands");
+ rewriter.replaceOpWithNewOp<IREE::Util::CallOp>(
+ srcOp, resultTypes, srcOp.getCallee(), adaptor.getOperands(),
+ tiedOperandsAttr);
+ return success();
+ }
+};
+
+class FuncReturnOpPattern : public OpConversionPattern<func::ReturnOp> {
+ using OpConversionPattern<func::ReturnOp>::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(func::ReturnOp srcOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ rewriter.replaceOpWithNewOp<IREE::Util::ReturnOp>(srcOp,
+ adaptor.getOperands());
+ return success();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Generic conversion
+//===----------------------------------------------------------------------===//
+
class GlobalOpPattern : public OpConversionPattern<IREE::Input::GlobalOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
@@ -466,37 +530,30 @@
}
return true;
};
-
- target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp funcOp) {
- for (Type type : funcOp.getFunctionType().getInputs()) {
- if (isIllegalType(type))
- return false;
- }
- for (Type type : funcOp.getFunctionType().getResults()) {
- if (isIllegalType(type))
- return false;
- }
- for (Block &block : funcOp.getFunctionBody()) {
- for (Type type : block.getArgumentTypes()) {
- if (isIllegalType(type))
- return false;
- }
- }
- return true;
- });
target.markUnknownOpDynamicallyLegal(isLegallyTypedOp);
IREETypeConverter typeConverter;
PatternBenefit specific_benefit = 100;
patterns.insert<GenericTypeConvert>(typeConverter, &getContext(), 0);
- patterns.insert<BuiltinFuncOpPattern>(typeConverter, &getContext(),
- specific_benefit);
patterns.insert<GlobalOpPattern>(typeConverter, &getContext(), 0);
patterns.insert<TensorExportPattern, TensorImportPattern>(
typeConverter, &getContext(), specific_benefit);
patterns.insert<ExecutableSourcePattern, ExecutableExportPattern>(
typeConverter, &getContext(), specific_benefit);
+ target.addDynamicallyLegalDialect<func::FuncDialect>(
+ [&](Operation *op) -> std::optional<bool> {
+ // Allow the func dialect within nested modules but not in the top-level
+ // one that represents the host program.
+ return op->getParentOfType<mlir::ModuleOp>() != getOperation();
+ });
+ patterns.insert<FuncFuncOpPattern>(typeConverter, &getContext(),
+ specific_benefit);
+ patterns.insert<FuncCallOpPattern>(typeConverter, &getContext(),
+ specific_benefit);
+ patterns.insert<FuncReturnOpPattern>(typeConverter, &getContext(),
+ specific_benefit);
+
#define ONE_TO_ONE(SrcOpTy, TargetOpTy) \
patterns.insert<OneToOneConverionPattern>( \
typeConverter, SrcOpTy::getOperationName(), \
diff --git a/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp b/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp
index 8f680ad..d112f5e 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/ImportMLProgram.cpp
@@ -12,7 +12,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MLProgram/IR/MLProgram.h"
#include "mlir/IR/BuiltinAttributeInterfaces.h"
#include "mlir/IR/BuiltinAttributes.h"
@@ -30,7 +29,7 @@
struct ImportMLProgramPass : public ImportMLProgramBase<ImportMLProgramPass> {
void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<IREE::Util::UtilDialect, func::FuncDialect>();
+ registry.insert<arith::ArithDialect, IREE::Util::UtilDialect>();
}
void runOnOperation() override;
};
@@ -171,11 +170,11 @@
FunctionType funcType =
rewriter.getFunctionType(/*input=*/TypeRange{}, /*outputs=*/newType);
ImplicitLocOpBuilder b(globalOp.getLoc(), rewriter);
- auto funcOp = b.create<func::FuncOp>(getterName, funcType);
+ auto funcOp = b.create<IREE::Util::FuncOp>(getterName, funcType);
funcOp.setPublic();
b.setInsertionPointToStart(funcOp.addEntryBlock());
auto val = globalOp.createLoadOp(globalOp.getLoc(), b);
- b.create<func::ReturnOp>(val.getLoadedGlobalValue());
+ b.create<IREE::Util::ReturnOp>(val.getLoadedGlobalValue());
}
if (!setterName.empty() && isMutable) {
@@ -183,11 +182,11 @@
FunctionType funcType =
rewriter.getFunctionType(/*input=*/newType, /*outputs=*/TypeRange{});
ImplicitLocOpBuilder b(globalOp.getLoc(), rewriter);
- auto funcOp = b.create<func::FuncOp>(setterName, funcType);
+ auto funcOp = b.create<IREE::Util::FuncOp>(setterName, funcType);
funcOp.setPublic();
b.setInsertionPointToStart(funcOp.addEntryBlock());
globalOp.createStoreOp(globalOp.getLoc(), funcOp.getArgument(0), b);
- b.create<func::ReturnOp>();
+ b.create<IREE::Util::ReturnOp>();
}
return success();
@@ -210,7 +209,8 @@
/*input=*/TypeRange{IREE::Util::ListType::get(
IREE::Util::VariantType::get(context))},
/*outputs=*/{});
- auto funcOp = b.create<func::FuncOp>("ireeMlProgramGlobalsInit", funcType);
+ auto funcOp =
+ b.create<IREE::Util::FuncOp>("ireeMlProgramGlobalsInit", funcType);
funcOp.setPublic();
b.setInsertionPointToStart(funcOp.addEntryBlock());
@@ -221,7 +221,7 @@
b.create<IREE::Util::GlobalStoreOp>(val, it.value().name);
}
- b.create<func::ReturnOp>();
+ b.create<IREE::Util::ReturnOp>();
return success();
}
diff --git a/compiler/src/iree/compiler/InputConversion/Common/test/import_ml_program.mlir b/compiler/src/iree/compiler/InputConversion/Common/test/import_ml_program.mlir
index 3084675..214bbf5 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/test/import_ml_program.mlir
+++ b/compiler/src/iree/compiler/InputConversion/Common/test/import_ml_program.mlir
@@ -5,12 +5,12 @@
ml_program.public_global_accessors = {
get = "global${0}$get", set = "global${0}$set"}} {
// CHECK: util.global private mutable @global_pubmut = 51 : i32
- // CHECK: func @global$global_pubmut$get() -> i32
- // CHECK: func @global$global_pubmut$set(%{{.*}}: i32)
+ // CHECK: util.func public @global$global_pubmut$get() -> i32
+ // CHECK: util.func public @global$global_pubmut$set(%{{.*}}: i32)
// CHECK-NOT: func
ml_program.global public mutable @global_pubmut(51 : i32) : i32
// CHECK: util.global private @global_pub = 52 : i32
- // CHECK: func @global$global_pub$get() -> i32
+ // CHECK: util.func public @global$global_pub$get() -> i32
// CHECK-NOT: func
ml_program.global public @global_pub(52 : i32) : i32
// CHECK: util.global private mutable @global_privmut = 53 : i32
@@ -25,8 +25,8 @@
builtin.module @globals attributes {
ml_program.public_global_accessors = {get = "global__{0}__get"}} {
// CHECK: util.global private mutable @global_pubmut = 51 : i32
- // CHECK: func @global__global_pubmut__get() -> i32
- // CHECK: func @global$global_pubmut$set
+ // CHECK: util.func public @global__global_pubmut__get() -> i32
+ // CHECK: util.func public @global$global_pubmut$set
ml_program.global public mutable @global_pubmut(51 : i32) : i32
}
@@ -34,8 +34,8 @@
// CHECK-LABEL: module @no_accessors_globals
builtin.module @no_accessors_globals {
// CHECK: util.global private mutable @global_pubmut = 51 : i32
- // CHECK: func @global$global_pubmut$get() -> i32
- // CHECK: func @global$global_pubmut$set(%{{.*}}: i32)
+ // CHECK: util.func public @global$global_pubmut$get() -> i32
+ // CHECK: util.func public @global$global_pubmut$set(%{{.*}}: i32)
ml_program.global public mutable @global_pubmut(51 : i32) : i32
}
@@ -43,10 +43,10 @@
// CHECK-LABEL: module @global_load
builtin.module @global_load {
ml_program.global private @v_loaded(dense<0> : tensor<4xi32>) : tensor<4xi32>
- func.func @loaded() {
+ util.func @loaded() {
// CHECK: util.global.load @v_loaded : tensor<4xi32>
%0 = ml_program.global_load @v_loaded : tensor<4xi32>
- return
+ util.return
}
}
@@ -54,10 +54,10 @@
// CHECK-LABEL: module @global_load_const
builtin.module @global_load_const {
ml_program.global private @v_loaded(dense<0> : tensor<4xi32>) : tensor<4xi32>
- func.func @loaded() {
+ util.func @loaded() {
// CHECK: util.global.load @v_loaded : tensor<4xi32>
%0 = ml_program.global_load_const @v_loaded : tensor<4xi32>
- return
+ util.return
}
}
@@ -65,12 +65,12 @@
// CHECK-LABEL: module @global_store
builtin.module @global_store {
ml_program.global private mutable @v_stored : tensor<4xi32>
- func.func @stored() {
+ util.func @stored() {
// CHECK: %[[CST:.*]] = arith.constant
%cst = arith.constant dense<5> : tensor<4xi32>
// CHECK: util.global.store %[[CST]], @v_stored : tensor<4xi32>
ml_program.global_store @v_stored = %cst : tensor<4xi32>
- return
+ util.return
}
}
@@ -88,7 +88,7 @@
// CHECK-DAG: util.global private mutable @global_privmut : i32
// CHECK-DAG: util.global private mutable @global_priv : i32
-// CHECK-LABEL: func.func @ireeMlProgramGlobalsInit(
+// CHECK-LABEL: util.func public @ireeMlProgramGlobalsInit(
// CHECK-SAME: %[[VAL_0:.*]]: !util.list<?>
// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_2:.*]] = util.list.get %[[VAL_0]]{{\[}}%[[VAL_1]]] : !util.list<?> -> i32
diff --git a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
index 31ad781..b7d746d 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
+++ b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
@@ -1,53 +1,67 @@
// RUN: iree-opt --split-input-file --iree-import-public %s | FileCheck %s
-// CHECK-LABEL: func.func @b_func
+// CHECK-LABEL: util.func private @private_func
+// CHECK: util.return
+func.func private @private_func() -> () {
+ return
+}
+
+// -----
+// CHECK-LABEL: util.func public @noinline_func
+// CHECK: inlining_policy = #util.inline.never
+func.func @noinline_func() -> () attributes {noinline} {
+ return
+}
+
+// -----
+// CHECK-LABEL: util.func public @b_func
// CHECK-SAME: (%arg0: !hal.buffer, %arg1: !hal.buffer) -> (!hal.buffer, !hal.buffer)
-// CHECK: return %arg0, %arg1 : !hal.buffer, !hal.buffer
+// CHECK: util.return %arg0, %arg1 : !hal.buffer, !hal.buffer
func.func @b_func(%arg0 : !iree_input.buffer, %arg1 : !iree_input.buffer) -> (!iree_input.buffer, !iree_input.buffer) {
return %arg0, %arg1 : !iree_input.buffer, !iree_input.buffer
}
// -----
-// CHECK-LABEL: func.func @bv_func
+// CHECK-LABEL: util.func public @bv_func
// CHECK-SAME: (%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> (!hal.buffer_view, !hal.buffer_view)
-// CHECK: return %arg0, %arg1 : !hal.buffer_view, !hal.buffer_view
+// CHECK: util.return %arg0, %arg1 : !hal.buffer_view, !hal.buffer_view
func.func @bv_func(%arg0 : !iree_input.buffer_view, %arg1 : !iree_input.buffer_view) -> (!iree_input.buffer_view, !iree_input.buffer_view) {
return %arg0, %arg1 : !iree_input.buffer_view, !iree_input.buffer_view
}
// -----
-// CHECK-LABEL: func.func @list_func
+// CHECK-LABEL: util.func public @list_func
// CHECK-SAME: (%arg0: !util.list<?>) -> !util.list<?>
func.func @list_func(%arg0 : !iree_input.list<!iree_input.variant>) -> !iree_input.list<!iree_input.variant> {
return %arg0 : !iree_input.list<!iree_input.variant>
}
// -----
-// CHECK-LABEL: func.func @list_func_retains_iree_abi
+// CHECK-LABEL: util.func public @list_func_retains_iree_attrs
// CHECK-SAME: (%arg0: !util.list<?>) -> !util.list<?>
-// CHECK-SAME: iree.abi = "FOOBAR"
-func.func @list_func_retains_iree_abi(%arg0 : !iree_input.list<!iree_input.variant>) -> !iree_input.list<!iree_input.variant>
- attributes {iree.abi = "FOOBAR"} {
+// CHECK-SAME: iree.reflection = {some.attr}
+func.func @list_func_retains_iree_attrs(%arg0 : !iree_input.list<!iree_input.variant>) -> !iree_input.list<!iree_input.variant>
+ attributes {iree.reflection = {some.attr}} {
return %arg0 : !iree_input.list<!iree_input.variant>
}
// -----
-// CHECK-LABEL: func.func @list_func_call
-// CHECK: call @list_func_call(%arg0) : (!util.list<?>) -> !util.list<?>
+// CHECK-LABEL: util.func public @list_func_call
+// CHECK: util.call @list_func_call(%arg0) : (!util.list<?>) -> !util.list<?>
func.func @list_func_call(%arg0 : !iree_input.list<!iree_input.variant>) -> !iree_input.list<!iree_input.variant> {
call @list_func_call(%arg0) : (!iree_input.list<!iree_input.variant>) -> !iree_input.list<!iree_input.variant>
return %arg0 : !iree_input.list<!iree_input.variant>
}
// -----
-// CHECK-LABEL: func.func @ptr_func
+// CHECK-LABEL: util.func public @ptr_func
// CHECK-SAME: (%arg0: !util.ptr<!hal.buffer_view>) -> !util.ptr<!hal.buffer_view>
func.func @ptr_func(%arg0 : !iree_input.ptr<!iree_input.buffer_view>) -> !iree_input.ptr<!iree_input.buffer_view> {
return %arg0 : !iree_input.ptr<!iree_input.buffer_view>
}
// -----
-// CHECK-LABEL: func.func @null_op
+// CHECK-LABEL: util.func public @null_op
// CHECK: util.null : !util.variant
func.func @null_op() -> !iree_input.variant {
%0 = iree_input.null : !iree_input.variant
@@ -55,7 +69,7 @@
}
//----
-// CHECK-LABEL: func.func @buffer_subspan
+// CHECK-LABEL: util.func public @buffer_subspan
// CHECK-SAME: (%arg0: !hal.buffer) -> !hal.buffer
// CHECK: %[[OFFSET:.+]] = arith.constant 100
// CHECK: %[[LENGTH:.+]] = arith.constant 200
@@ -70,7 +84,7 @@
}
//----
-// CHECK-LABEL: func.func @buffer_view_create
+// CHECK-LABEL: util.func public @buffer_view_create
// CHECK-SAME: (%arg0: !hal.buffer) -> !hal.buffer_view
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[C2:.*]] = arith.constant 2 : index
@@ -99,7 +113,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_export
+// CHECK-LABEL: util.func public @tensor_export
// CHECK: hal.tensor.export %arg0 : tensor<?x?x3xf32>{%arg1, %arg2} -> !hal.buffer_view
func.func @tensor_export(%arg0 : tensor<?x?x3xf32>, %arg1 : index, %arg2 : index) -> !iree_input.buffer_view {
%0 = iree_input.tensor.export %arg0 : tensor<?x?x3xf32>{%arg1, %arg2} -> !iree_input.buffer_view
@@ -107,7 +121,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_export_static
+// CHECK-LABEL: util.func public @tensor_export_static
// CHECK: hal.tensor.export %arg0 : tensor<3xf32> -> !hal.buffer_view
func.func @tensor_export_static(%arg0 : tensor<3xf32>) -> !iree_input.buffer_view {
%0 = iree_input.tensor.export %arg0 : tensor<3xf32> -> !iree_input.buffer_view
@@ -115,7 +129,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_export_implicit_dims
+// CHECK-LABEL: util.func public @tensor_export_implicit_dims
// CHECK: %[[ZERO:.*]] = arith.constant 0
// CHECK: %[[D0:.*]] = tensor.dim %arg0, %[[ZERO]]
// CHECK: %[[ONE:.*]] = arith.constant 1
@@ -127,7 +141,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_import
+// CHECK-LABEL: util.func public @tensor_import
// CHECK: hal.tensor.import %arg0 : !hal.buffer_view -> tensor<?x?x3xf32>{%arg1, %arg2}
func.func @tensor_import(%arg0 : !iree_input.buffer_view, %arg1 : index, %arg2 : index) -> tensor<?x?x3xf32> {
%0 = iree_input.tensor.import %arg0 : !iree_input.buffer_view -> tensor<?x?x3xf32>{%arg1, %arg2}
@@ -135,7 +149,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_import_static
+// CHECK-LABEL: util.func public @tensor_import_static
// CHECK: hal.tensor.import %arg0 : !hal.buffer_view -> tensor<3xf32>
func.func @tensor_import_static(%arg0 : !iree_input.buffer_view) -> tensor<3xf32> {
%0 = iree_input.tensor.import %arg0 : !iree_input.buffer_view -> tensor<3xf32>
@@ -143,7 +157,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_import_implicit_dims
+// CHECK-LABEL: util.func public @tensor_import_implicit_dims
// CHECK: %[[D0:.*]] = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
// CHECK: %[[D1:.*]] = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
// CHECK: hal.tensor.import %arg0 : !hal.buffer_view -> tensor<?x?x3xf32>{%[[D0]], %[[D1]]}
@@ -153,7 +167,7 @@
}
// -----
-// CHECK-LABEL: func.func @buffer_view_rank
+// CHECK-LABEL: util.func public @buffer_view_rank
// CHECK: hal.buffer_view.rank<%arg0 : !hal.buffer_view> : index
func.func @buffer_view_rank(%arg0 : !iree_input.buffer_view) -> index {
%0 = iree_input.buffer_view.rank %arg0 : index
@@ -161,16 +175,16 @@
}
// -----
-// CHECK-LABEL: func.func @byte_buffer_constant
+// CHECK-LABEL: util.func public @byte_buffer_constant
// CHECK: %[[B:.*]] = util.buffer.constant "name" {alignment = 64 : index, mime_type = "text/plain"} : !util.buffer = "foo"
-// CHECK: return %[[B]] : !util.buffer
+// CHECK: util.return %[[B]] : !util.buffer
func.func @byte_buffer_constant() -> !iree_input.byte_buffer {
%0 = iree_input.byte_buffer.constant "name" {alignment = 64 : index, mime_type = "text/plain"} : !iree_input.byte_buffer = "foo"
return %0 : !iree_input.byte_buffer
}
// -----
-// CHECK-LABEL: func.func @buffer_view_dim
+// CHECK-LABEL: util.func public @buffer_view_dim
// CHECK: hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
func.func @buffer_view_dim(%arg0 : !iree_input.buffer_view) -> index {
%0 = iree_input.buffer_view.dim %arg0, 0 : index
@@ -178,7 +192,7 @@
}
// -----
-// CHECK-LABEL: func.func @list_create
+// CHECK-LABEL: util.func public @list_create
// CHECK: util.list.create %arg0 : !util.list<?>
func.func @list_create(%arg0 : index) -> !iree_input.list<!iree_input.variant> {
%0 = iree_input.list.create %arg0 : !iree_input.list<!iree_input.variant>
@@ -186,7 +200,7 @@
}
// -----
-// CHECK-LABEL: func.func @list_size
+// CHECK-LABEL: util.func public @list_size
// CHECK: util.list.size %arg0 : !util.list<?>
func.func @list_size(%arg0 : !iree_input.list<!iree_input.variant>) -> index {
%0 = iree_input.list.size %arg0 : !iree_input.list<!iree_input.variant>
@@ -194,7 +208,7 @@
}
// -----
-// CHECK-LABEL: func.func @list_resize
+// CHECK-LABEL: util.func public @list_resize
// CHECK: util.list.resize %arg0, %arg1 : !util.list<?>
func.func @list_resize(%arg0 : !iree_input.list<!iree_input.variant>, %arg1 : index) {
iree_input.list.resize %arg0, %arg1 : !iree_input.list<!iree_input.variant>
@@ -202,7 +216,7 @@
}
// -----
-// CHECK-LABEL: func.func @list_get
+// CHECK-LABEL: util.func public @list_get
// CHECK: util.list.get %arg0[%arg1] : !util.list<?>
func.func @list_get(%arg0 : !iree_input.list<!iree_input.variant>, %arg1 : index) -> !iree_input.variant {
%0 = iree_input.list.get %arg0[%arg1] : !iree_input.list<!iree_input.variant> -> !iree_input.variant
@@ -210,7 +224,7 @@
}
// -----
-// CHECK-LABEL: func.func @list_set
+// CHECK-LABEL: util.func public @list_set
// CHECK: util.list.set %arg0[%arg1], %arg2 : !util.list<?>
func.func @list_set(%arg0 : !iree_input.list<!iree_input.variant>, %arg1 : index, %arg2 : !iree_input.variant) {
iree_input.list.set %arg0[%arg1], %arg2 : !iree_input.list<!iree_input.variant>, !iree_input.variant
@@ -218,7 +232,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_reshape
+// CHECK-LABEL: util.func public @tensor_reshape
// CHECK: flow.tensor.reshape %arg0 : tensor<?x?xf32>{%arg1, %arg2} -> tensor<?x?xf32>{%arg2, %arg1}
func.func @tensor_reshape(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index) -> tensor<?x?xf32> {
%0 = iree_input.tensor.reshape %arg0 : tensor<?x?xf32>{%arg1, %arg2} -> tensor<?x?xf32>{%arg2, %arg1}
@@ -226,7 +240,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_load
+// CHECK-LABEL: util.func public @tensor_load
// CHECK: flow.tensor.load %arg0[%arg2, %arg3] : tensor<?x3xf32>{%arg1}
func.func @tensor_load(%arg0 : tensor<?x3xf32>, %arg1 : index, %arg2 : index, %arg3 : index) -> f32 {
%0 = iree_input.tensor.load %arg0[%arg2, %arg3] : tensor<?x3xf32>{%arg1}
@@ -234,7 +248,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_store
+// CHECK-LABEL: util.func public @tensor_store
// CHECK: flow.tensor.store %arg4, %arg0[%arg2, %arg3] : tensor<?x3xf32>{%arg1}
func.func @tensor_store(%arg0 : tensor<?x3xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : f32) {
iree_input.tensor.store %arg4, %arg0[%arg2, %arg3] : tensor<?x3xf32>{%arg1}
@@ -242,7 +256,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_splat
+// CHECK-LABEL: util.func public @tensor_splat
// CHECK: flow.tensor.splat %arg0 : tensor<?x?xf32>{%arg1, %arg2}
func.func @tensor_splat(%arg0 : f32, %arg1 : index, %arg2 : index) -> tensor<?x?xf32> {
%0 = iree_input.tensor.splat %arg0 : tensor<?x?xf32>{%arg1, %arg2}
@@ -250,7 +264,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_clone
+// CHECK-LABEL: util.func public @tensor_clone
// CHECK: flow.tensor.clone %arg0 : tensor<?x?xf32>{%arg1, %arg2}
func.func @tensor_clone(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index) -> tensor<?x?xf32> {
%0 = iree_input.tensor.clone %arg0 : tensor<?x?xf32>{%arg1, %arg2}
@@ -258,7 +272,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_slice
+// CHECK-LABEL: util.func public @tensor_slice
// CHECK: flow.tensor.slice %arg0[%arg1 for %arg2] : tensor<?xf32>{%arg3} -> tensor<?xf32>{%arg4}
func.func @tensor_slice(%arg0 : tensor<?xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> tensor<?xf32> {
%0 = iree_input.tensor.slice %arg0[%arg1 for %arg2] : tensor<?xf32>{%arg3} -> tensor<?xf32>{%arg4}
@@ -266,7 +280,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_update
+// CHECK-LABEL: util.func public @tensor_update
// CHECK: flow.tensor.update %arg3, %arg0[%arg1] : tensor<?xf32>{%arg2} -> %arg0 as tensor<?xf32>{%arg4}
func.func @tensor_update(%arg0 : tensor<?xf32>, %arg1 : index, %arg2 : index, %arg3 : tensor<?xf32>, %arg4 : index) -> tensor<?xf32> {
%0 = iree_input.tensor.update %arg3, %arg0[%arg1] : tensor<?xf32>{%arg2} -> %arg0 as tensor<?xf32>{%arg4}
@@ -274,7 +288,7 @@
}
// -----
-// CHECK-LABEL: func.func @tensor_trace
+// CHECK-LABEL: util.func public @tensor_trace
// CHECK: flow.tensor.trace "FOOBAR" = [
// CHECK-SAME: %arg0 : tensor<5xf32>,
// CHECK-SAME: %arg1 : tensor<?x3xf32>{%arg2}
@@ -302,11 +316,11 @@
// CHECK: util.global public @global5 : tensor<4xi32>
iree_input.global @global5 initializer(@initializer) : tensor<4xi32>
// CHECK-NEXT: util.initializer {
- // CHECK-NEXT: %[[VALUE:.+]] = func.call @initializer() : () -> tensor<4xi32>
+ // CHECK-NEXT: %[[VALUE:.+]] = util.call @initializer() : () -> tensor<4xi32>
// CHECK-NEXT: util.global.store %[[VALUE]], @global5 : tensor<4xi32>
// CHECK-NEXT: util.return
// CHECK-NEXT: }
- // CHECK: func.func private @initializer() -> tensor<4xi32>
+ // CHECK: util.func private @initializer() -> tensor<4xi32>
func.func private @initializer() -> tensor<4xi32>
}
@@ -361,7 +375,7 @@
}
// -----
-// CHECK-LABEL: func.func @optimization_barrier
+// CHECK-LABEL: util.func public @optimization_barrier
// CHECK: util.optimization_barrier %arg0 : tensor<f32>
func.func @optimization_barrier(%arg0 : tensor<f32>) -> tensor<f32> {
%0 = iree_input.optimization_barrier %arg0 : tensor<f32>
@@ -391,9 +405,7 @@
#sm75 = #iree_input.executable.target<"cuda", "cuda-nvptx-fb", {
target_arch = "sm_75"
}>
-
builtin.module @executable_source {
-
iree_input.executable.source private @executable attributes {
objects = #iree_input.executable.objects<{
#sm75 = [#iree_input.executable.object<{path = "executable.ptx"}>]
@@ -409,7 +421,6 @@
workgroup_size = [64 : index, 1 : index, 1 : index]
}
}
-
func.func @dispatch(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
%c0 = arith.constant 0 : index
%0 = flow.dispatch @executable::@add[%c0](%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> %arg1
diff --git a/compiler/src/iree/compiler/Modules/Check/test/canonicalize.mlir b/compiler/src/iree/compiler/Modules/Check/test/canonicalize.mlir
index e5e6a3a..1afdfaa 100644
--- a/compiler/src/iree/compiler/Modules/Check/test/canonicalize.mlir
+++ b/compiler/src/iree/compiler/Modules/Check/test/canonicalize.mlir
@@ -4,20 +4,20 @@
// CHECK-LABEL: @expect_eq_const
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_eq_const(%lhs : tensor<2x2xi32>) {
+util.func public @expect_eq_const(%lhs : tensor<2x2xi32>) {
// CHECK: %[[C:.+]] = arith.constant dense<1> : tensor<2x2xi32>
// CHECK: check.expect_eq(%[[LHS]], %[[C]]) : tensor<2x2xi32>
check.expect_eq_const(%lhs, dense<1> : tensor<2x2xi32>) : tensor<2x2xi32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_almost_eq_const
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_almost_eq_const(%lhs : tensor<2x2xf32>) {
+util.func public @expect_almost_eq_const(%lhs : tensor<2x2xf32>) {
// CHECK: %[[C:.+]] = arith.constant dense<1.000000e+00> : tensor<2x2xf32>
// CHECK: check.expect_almost_eq(%[[LHS]], %[[C]]) : tensor<2x2xf32>
check.expect_almost_eq_const(%lhs, dense<1.0> : tensor<2x2xf32>) : tensor<2x2xf32>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/Check/test/ops.mlir b/compiler/src/iree/compiler/Modules/Check/test/ops.mlir
index 454e62a..97dc675 100644
--- a/compiler/src/iree/compiler/Modules/Check/test/ops.mlir
+++ b/compiler/src/iree/compiler/Modules/Check/test/ops.mlir
@@ -4,40 +4,40 @@
// CHECK-LABEL: @expect_true
// CHECK-SAME: %[[ARG:[a-zA-Z0-9$._-]+]]
-func.func @expect_true(%arg : i32) {
+util.func public @expect_true(%arg : i32) {
// CHECK: check.expect_true(%[[ARG]]) : i32
check.expect_true(%arg) : i32
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_false
// CHECK-SAME: %[[ARG:[a-zA-Z0-9$._-]+]]
-func.func @expect_false(%arg : i32) {
+util.func public @expect_false(%arg : i32) {
// CHECK: check.expect_false(%[[ARG]]) : i32
check.expect_false(%arg) : i32
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_all_true
// CHECK-SAME: %[[ARG:[a-zA-Z0-9$._-]+]]
-func.func @expect_all_true(%arg : !hal.buffer_view) {
+util.func public @expect_all_true(%arg : !hal.buffer_view) {
// CHECK: check.expect_all_true(%[[ARG]]) : !hal.buffer_view
check.expect_all_true(%arg) : !hal.buffer_view
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_all_true_tensor
// CHECK-SAME: %[[ARG:[a-zA-Z0-9$._-]+]]
-func.func @expect_all_true_tensor(%arg : tensor<2x2xi32>) {
+util.func public @expect_all_true_tensor(%arg : tensor<2x2xi32>) {
// CHECK: check.expect_all_true(%[[ARG]]) : tensor<2x2xi32>
check.expect_all_true(%arg) : tensor<2x2xi32>
- return
+ util.return
}
// -----
@@ -45,10 +45,10 @@
// CHECK-LABEL: @expect_eq
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
// CHECK-SAME: %[[RHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_eq(%lhs : !hal.buffer_view, %rhs : !hal.buffer_view) {
+util.func public @expect_eq(%lhs : !hal.buffer_view, %rhs : !hal.buffer_view) {
// CHECK: check.expect_eq(%[[LHS]], %[[RHS]]) : !hal.buffer_view
check.expect_eq(%lhs, %rhs) : !hal.buffer_view
- return
+ util.return
}
// -----
@@ -56,20 +56,20 @@
// CHECK-LABEL: @expect_eq_tensor
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
// CHECK-SAME: %[[RHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_eq_tensor(%lhs : tensor<2x2xi32>, %rhs : tensor<2x2xi32>) {
+util.func public @expect_eq_tensor(%lhs : tensor<2x2xi32>, %rhs : tensor<2x2xi32>) {
// CHECK: check.expect_eq(%[[LHS]], %[[RHS]]) : tensor<2x2xi32>
check.expect_eq(%lhs, %rhs) : tensor<2x2xi32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_eq_const
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_eq_const(%lhs : tensor<2x2xi32>) {
+util.func public @expect_eq_const(%lhs : tensor<2x2xi32>) {
// CHECK: check.expect_eq_const(%[[LHS]], dense<1> : tensor<2x2xi32>) : tensor<2x2xi32>
check.expect_eq_const(%lhs, dense<1> : tensor<2x2xi32>) : tensor<2x2xi32>
- return
+ util.return
}
// -----
@@ -77,10 +77,10 @@
// CHECK-LABEL: @expect_almost_eq
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
// CHECK-SAME: %[[RHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_almost_eq(%lhs : !hal.buffer_view, %rhs : !hal.buffer_view) {
+util.func public @expect_almost_eq(%lhs : !hal.buffer_view, %rhs : !hal.buffer_view) {
// CHECK: check.expect_almost_eq(%[[LHS]], %[[RHS]]) : !hal.buffer_view
check.expect_almost_eq(%lhs, %rhs) : !hal.buffer_view
- return
+ util.return
}
// -----
@@ -88,18 +88,18 @@
// CHECK-LABEL: @expect_almost_eq_tensor
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
// CHECK-SAME: %[[RHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_almost_eq_tensor(%lhs : tensor<2x2xf32>, %rhs : tensor<2x2xf32>) {
+util.func public @expect_almost_eq_tensor(%lhs : tensor<2x2xf32>, %rhs : tensor<2x2xf32>) {
// CHECK: check.expect_almost_eq(%[[LHS]], %[[RHS]]) : tensor<2x2xf32>
check.expect_almost_eq(%lhs, %rhs) : tensor<2x2xf32>
- return
+ util.return
}
// -----
// CHECK-LABEL: @expect_almost_eq_const
// CHECK-SAME: %[[LHS:[a-zA-Z0-9$._-]+]]
-func.func @expect_almost_eq_const(%lhs : tensor<2x2xf32>) {
+util.func public @expect_almost_eq_const(%lhs : tensor<2x2xf32>) {
// CHECK: check.expect_almost_eq_const(%[[LHS]], dense<1.000000e+00> : tensor<2x2xf32>) : tensor<2x2xf32>
check.expect_almost_eq_const(%lhs, dense<1.0> : tensor<2x2xf32>) : tensor<2x2xf32>
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_ops.mlir
index 815bd44..a817404 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @buffer_subspan
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer)
-func.func @buffer_subspan(%buffer: !hal.buffer) -> !hal.buffer {
+util.func public @buffer_subspan(%buffer: !hal.buffer) -> !hal.buffer {
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 100
%offset = arith.constant 100 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 200
@@ -10,24 +10,24 @@
// CHECK: %[[SUBSPAN:.+]] = hal_inline.buffer.subspan<%[[BUFFER]] : !hal.buffer>[%[[OFFSET]], %[[LENGTH]]] : !hal.buffer
%subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%offset, %length] : !hal.buffer
// CHECK: return %[[SUBSPAN]]
- return %subspan : !hal.buffer
+ util.return %subspan : !hal.buffer
}
// -----
// CHECK-LABEL: @buffer_length
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer)
-func.func @buffer_length(%buffer: !hal.buffer) -> index {
+util.func public @buffer_length(%buffer: !hal.buffer) -> index {
// CHECK: hal_inline.buffer.length<%[[BUFFER]] : !hal.buffer> : index
%length = hal.buffer.length<%buffer : !hal.buffer> : index
- return %length : index
+ util.return %length : index
}
// -----
// CHECK-LABEL: @buffer_load
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer)
-func.func @buffer_load(%buffer: !hal.buffer) -> i32 {
+util.func public @buffer_load(%buffer: !hal.buffer) -> i32 {
// CHECK-DAG: %[[REL_OFFSET:.+]] = arith.constant 100
%rel_offset = arith.constant 100 : index
// CHECK-DAG: %[[STORAGE:.+]] = hal_inline.buffer.storage<%[[BUFFER:.+]] : !hal.buffer> : !util.buffer
@@ -35,19 +35,19 @@
// CHECK: %[[VALUE:.+]] = util.buffer.load %[[STORAGE]][%[[REL_OFFSET]] for {{.+}}] : !util.buffer{%[[LENGTH]]} -> i32
%value = hal.buffer.load<%buffer : !hal.buffer>[%rel_offset] : i32
// CHECK-NEXT: return %[[VALUE]]
- return %value : i32
+ util.return %value : i32
}
// -----
// CHECK-LABEL: @buffer_store
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer, %[[VALUE:.+]]: i32)
-func.func @buffer_store(%buffer: !hal.buffer, %value: i32) {
+util.func public @buffer_store(%buffer: !hal.buffer, %value: i32) {
// CHECK-DAG: %[[REL_OFFSET:.+]] = arith.constant 100
%rel_offset = arith.constant 100 : index
// CHECK-DAG: %[[STORAGE:.+]] = hal_inline.buffer.storage<%[[BUFFER:.+]] : !hal.buffer> : !util.buffer
// CHECK-DAG: %[[LENGTH:.+]] = hal_inline.buffer.length<%[[BUFFER]] : !hal.buffer> : index
// CHECK: util.buffer.store %[[VALUE]], %[[STORAGE]][%[[REL_OFFSET]] for {{.+}}] : i32 -> !util.buffer{%[[LENGTH]]}
hal.buffer.store<%buffer : !hal.buffer>[%rel_offset] value(%value : i32)
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_view_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_view_ops.mlir
index ca71d84..aaa8937 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_view_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/HALToHALInline/test/buffer_view_ops.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --iree-hal-inline-conversion %s | FileCheck %s
// CHECK-LABEL: @buffer_view_create
-func.func @buffer_view_create(%arg0: !hal.buffer, %arg1: index, %arg2: index) -> !hal.buffer_view {
+util.func public @buffer_view_create(%arg0: !hal.buffer, %arg1: index, %arg2: index) -> !hal.buffer_view {
%c1 = arith.constant 1 : i32
%c32 = arith.constant 32 : i32
// CHECK: %view = hal_inline.buffer_view.create
@@ -13,25 +13,25 @@
shape([%arg1, %arg2])
type(%c32)
encoding(%c1) : !hal.buffer_view
- return %view : !hal.buffer_view
+ util.return %view : !hal.buffer_view
}
// -----
// CHECK-LABEL: @buffer_view_buffer
-func.func @buffer_view_buffer(%arg0: !hal.buffer_view) -> !hal.buffer {
+util.func public @buffer_view_buffer(%arg0: !hal.buffer_view) -> !hal.buffer {
// CHECK: %buffer = hal_inline.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer
- return %buffer : !hal.buffer
+ util.return %buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @buffer_view_shape_queries
-func.func @buffer_view_shape_queries(%arg0: !hal.buffer_view) -> (index, index) {
+util.func public @buffer_view_shape_queries(%arg0: !hal.buffer_view) -> (index, index) {
// CHECK: %{{.+}} = hal_inline.buffer_view.rank<%arg0 : !hal.buffer_view> : index
%0 = hal.buffer_view.rank<%arg0 : !hal.buffer_view> : index
// CHECK: %{{.+}} = hal_inline.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
- return %0, %1 : index, index
+ util.return %0, %1 : index, index
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
index 12c1ba0..d0e643d 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/Patterns.cpp
@@ -15,7 +15,6 @@
#include "iree/compiler/Modules/HAL/Inline/IR/HALInlineDialect.h"
#include "iree/compiler/Modules/HAL/Inline/IR/HALInlineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Transforms/DialectConversion.h"
namespace mlir::iree_compiler {
@@ -497,8 +496,9 @@
llvm::append_range(callArgs, bindingBuffers);
llvm::append_range(callArgs, bindingOffsets);
llvm::append_range(callArgs, adaptor.getResourceLengths());
- rewriter.replaceOpWithNewOp<func::CallOp>(dispatchOp, callee, TypeRange{},
- callArgs);
+ rewriter.replaceOpWithNewOp<IREE::Util::CallOp>(
+ dispatchOp, TypeRange{}, callee.getLeafReference(), callArgs,
+ /*tied_operands=*/ArrayAttr{});
return success();
}
};
@@ -516,11 +516,12 @@
newResultTypes))) {
return rewriter.notifyMatchFailure(funcOp, "failed to convert types");
}
- auto newOp = rewriter.replaceOpWithNewOp<func::FuncOp>(
+ auto newOp = rewriter.replaceOpWithNewOp<IREE::Util::FuncOp>(
funcOp, funcOp.getName(),
rewriter.getFunctionType(newArgTypes, newResultTypes),
- funcOp.getSymVisibilityAttr(), funcOp.getAllArgAttrs(),
- funcOp.getAllResultAttrs());
+ /*tied_operands=*/ArrayAttr{}, funcOp.getSymVisibilityAttr(),
+ funcOp.getAllArgAttrs(), funcOp.getAllResultAttrs(),
+ IREE::Util::InliningPolicyAttrInterface{});
newOp->setDialectAttrs(funcOp->getDialectAttrs());
return success();
}
@@ -561,8 +562,9 @@
llvm::append_range(resultTypes, convertedTypes);
}
- rewriter.replaceOpWithNewOp<func::CallOp>(callOp, callOp.getCalleeAttr(),
- resultTypes, operands);
+ rewriter.replaceOpWithNewOp<IREE::Util::CallOp>(
+ callOp, resultTypes, callOp.getCallee(), operands,
+ /*tied_operands=*/ArrayAttr{});
return success();
}
};
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/cmd_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/cmd_ops.mlir
index cf6ff97..8c3d022 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/cmd_ops.mlir
@@ -4,7 +4,7 @@
// assume coherent memory.
// CHECK-LABEL: @cmdMemoryControl
-func.func @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
+util.func public @cmdMemoryControl(%arg0: !stream.resource<transient>, %arg1: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%fence = stream.cmd.execute with(%arg0 as %arg2: !stream.resource<transient>{%arg1}) {
@@ -13,14 +13,14 @@
stream.cmd.discard %arg2[%c0 for %c128] : !stream.resource<transient>{%arg1}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdFill
// CHECK-SAME: (%[[TARGET:.+]]: !util.buffer, %[[TARGET_SIZE:.+]]: index)
-func.func @cmdFill(%target: !stream.resource<transient>, %target_size: index) -> !stream.timepoint {
+util.func public @cmdFill(%target: !stream.resource<transient>, %target_size: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 128
%length = arith.constant 128 : index
@@ -31,14 +31,14 @@
stream.cmd.fill %value, %target_inner[%c0 for %length] : i32 -> !stream.resource<transient>{%target_size}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdCopy
// CHECK-SAME: (%[[SRC:.+]]: !util.buffer, %[[SRC_SIZE:.+]]: index, %[[DST:.+]]: !util.buffer, %[[DST_SIZE:.+]]: index)
-func.func @cmdCopy(%src: !stream.resource<transient>, %src_size: index,
+util.func public @cmdCopy(%src: !stream.resource<transient>, %src_size: index,
%dst: !stream.resource<staging>, %dst_size: index) -> !stream.timepoint {
// CHECK-DAG: %[[SRC_OFFSET:.+]] = arith.constant 100
%src_offset = arith.constant 100 : index
@@ -52,13 +52,13 @@
stream.cmd.copy %src_inner[%src_offset], %dst_inner[%dst_offset], %length : !stream.resource<transient>{%src_size} -> !stream.resource<staging>{%dst_size}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
// -----
// CHECK-LABEL: @cmdExecute
-func.func @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
+util.func public @cmdExecute(%arg0: !stream.resource<transient>, %arg1: index, %arg2: !stream.resource<staging>, %arg3: index, %arg4: !stream.timepoint) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%fence = stream.cmd.execute await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
@@ -78,13 +78,13 @@
}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
// -----
// Provided by the iree-hal-inline-executables pass:
-func.func private @__dispatch_ex_dispatch(
+util.func private @__dispatch_ex_dispatch(
index, index, // workload[2]
i32, i32, // pushConstants[2]
!util.buffer, !util.buffer, // bindingBuffers[2]
@@ -97,7 +97,7 @@
// CHECK-LABEL: @cmdDispatch
// CHECK-SAME: (%[[BUFFER0:.+]]: !util.buffer, %[[BUFFER0_SIZE:.+]]: index,
// CHECK-SAME: %[[BUFFER1:.+]]: !hal.buffer, %[[BUFFER1_SIZE:.+]]: index)
-func.func @cmdDispatch(%buffer0: !stream.resource<transient>, %buffer0_size: index,
+util.func public @cmdDispatch(%buffer0: !stream.resource<transient>, %buffer0_size: index,
%buffer1: !stream.resource<external>, %buffer1_size: index) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -112,7 +112,7 @@
%fence = stream.cmd.execute with(%buffer0 as %buffer0_inner: !stream.resource<transient>{%buffer0_size},
%buffer1 as %buffer1_inner: !stream.resource<external>{%buffer1_size}) {
// CHECK: %[[BUFFER1_STORAGE:.+]] = hal_inline.buffer.storage<%[[BUFFER1]]
- // CHECK: call @__dispatch_ex_dispatch(
+ // CHECK: util.call @__dispatch_ex_dispatch(
// CHECK-SAME: %c1, %c2,
// CHECK-SAME: %c4_i32, %c5_i32,
// CHECK-SAME: %[[BUFFER0]], %[[BUFFER1_STORAGE]],
@@ -127,7 +127,7 @@
}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
// -----
@@ -136,11 +136,11 @@
// Note that we get a buffer + offset + length for each resource but unlike the
// full HAL path there's no command buffer passed in.
-// CHECK: func.func private @cmdFunc(!util.buffer, index, index, i32, !util.buffer, index, index, !custom.type, !util.buffer, index, index)
+// CHECK: util.func private @cmdFunc(%arg0: !util.buffer, %arg1: index, %arg2: index, %arg3: i32, %arg4: !util.buffer, %arg5: index, %arg6: index, %arg7: !custom.type, %arg8: !util.buffer, %arg9: index, %arg10: index)
stream.cmd.func private @cmdFunc(%arg0[%arg1 for %arg2]: !stream.resource<*>, %arg3: i32, %arg4[%arg5 for %arg6]: !stream.resource<*>, %arg7: !custom.type, %arg8[%arg9 for %arg10]: !stream.resource<*>)
// CHECK-LABEL: @cmdCall
-func.func @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<transient>, %arg3: !custom.type, %arg4: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @cmdCall(%arg0: !stream.resource<external>, %arg1: i32, %arg2: !stream.resource<transient>, %arg3: !custom.type, %arg4: !stream.resource<transient>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
// CHECK-DAG: %[[SIZE0:.+]] = arith.constant 100
%size0 = arith.constant 100 : index
@@ -150,9 +150,9 @@
%size2 = arith.constant 102 : index
// CHECK-DAG: %[[ARG0_STORAGE:.+]] = hal_inline.buffer.storage<%arg0 : !hal.buffer> : !util.buffer
%timepoint = stream.cmd.execute with(%arg0 as %stream0: !stream.resource<external>{%size0}, %arg2 as %stream1: !stream.resource<transient>{%size1}, %arg4 as %stream2: !stream.resource<transient>{%size2}) {
- // CHECK: call @cmdFunc(%[[ARG0_STORAGE]], %c0, %[[SIZE0]], %arg1, %arg2, %c0, %[[SIZE1]], %arg3, %arg4, %c0, %[[SIZE2]]) :
+ // CHECK: util.call @cmdFunc(%[[ARG0_STORAGE]], %c0, %[[SIZE0]], %arg1, %arg2, %c0, %[[SIZE1]], %arg3, %arg4, %c0, %[[SIZE2]]) :
// CHECK-SAME: (!util.buffer, index, index, i32, !util.buffer, index, index, !custom.type, !util.buffer, index, index) -> ()
stream.cmd.call @cmdFunc(ro %stream0[%c0 for %size0], %arg1, rw %stream1[%c0 for %size1], %arg3, wo %stream2[%c0 for %size2]) : (!stream.resource<external>{%size0}, i32, !stream.resource<transient>{%size1}, !custom.type, !stream.resource<transient>{%size2}) -> ()
} => !stream.timepoint
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/debug_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/debug_ops.mlir
index 24d3565..4ede010 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/debug_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/debug_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @tensorTrace
// CHECK-SAME: (%[[TENSOR0_STORAGE:.+]]: !util.buffer, %[[TENSOR0_SIZE:.+]]: index, %[[TENSOR1_STORAGE:.+]]: !util.buffer, %[[TENSOR1_SIZE:.+]]: index, %[[TENSOR1_DIM0:.+]]: index)
-func.func @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index) {
+util.func public @tensorTrace(%tensor0: !stream.resource<staging>, %tensor0_size: index, %tensor1: !stream.resource<staging>, %tensor1_size: index, %tensor1_dim0: index) {
// CHECK-DAG: %[[TENSOR0_BUFFER:.+]] = hal_inline.buffer.wrap source(%[[TENSOR0_STORAGE]] : !util.buffer)[%c0, %[[TENSOR0_SIZE]]] : !hal.buffer
// CHECK-DAG: %[[TENSOR0:.+]] = hal_inline.buffer_view.create buffer(%[[TENSOR0_BUFFER]] : !hal.buffer)[%c0{{.*}}, %[[TENSOR0_SIZE]]] shape([%c5, %c3])
// CHECK-DAG: %[[TENSOR1_BUFFER:.+]] = hal_inline.buffer.wrap source(%[[TENSOR1_STORAGE]] : !util.buffer)[%c0, %[[TENSOR1_SIZE]]] : !hal.buffer
@@ -12,5 +12,5 @@
%tensor0 : tensor<5x3xf32> in !stream.resource<staging>{%tensor0_size},
%tensor1 : tensor<?x5xf32>{%tensor1_dim0} in !stream.resource<staging>{%tensor1_size}
]
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/file_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/file_ops.mlir
index 69ce43f..58ab268 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/file_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/file_ops.mlir
@@ -7,7 +7,7 @@
// CHECK-LABEL: @file_constant
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer) -> !util.buffer
-func.func @file_constant(%buffer: !util.buffer) -> !stream.file {
+util.func public @file_constant(%buffer: !util.buffer) -> !stream.file {
%c0 = arith.constant 0 : index
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -15,28 +15,28 @@
// CHECK: %[[SPAN:.+]] = util.buffer.subspan %[[BUFFER]][%c100] : !util.buffer{%c300} -> !util.buffer{%c200}
%file = stream.file.constant %buffer[%c100 for %c200] : !util.buffer{%c300} -> !stream.file
// CHECK: return %[[SPAN]]
- return %file : !stream.file
+ util.return %file : !stream.file
}
// -----
// CHECK-LABEL: @file_read
// CHECK-SAME: (%[[WAIT:.+]]: i64, %[[FILE:.+]]: !util.buffer, %[[RESOURCE:.+]]: !util.buffer)
-func.func @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
+util.func public @file_read(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%offset = arith.constant 100 : i64
%c1088 = arith.constant 1088 : index
// CHECK: %[[SIGNAL:.+]] = arith.constant 0 : i64
%signal = stream.file.read await(%wait) => %file[%offset], %resource[%c0], %c1088 : !stream.file -> !stream.resource<variable>{%c1088} => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %signal : !stream.timepoint
+ util.return %signal : !stream.timepoint
}
// -----
// CHECK-LABEL: @file_write
// CHECK-SAME: (%[[WAIT:.+]]: i64, %[[FILE:.+]]: !util.buffer, %[[RESOURCE:.+]]: !util.buffer)
-func.func @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
+util.func public @file_write(%wait: !stream.timepoint, %file: !stream.file, %resource: !stream.resource<variable>) -> !stream.timepoint {
%c0 = arith.constant 0 : index
%offset = arith.constant 100 : i64
%c1088 = arith.constant 1088 : index
@@ -45,14 +45,14 @@
// CHECK: %[[SIGNAL:.+]] = arith.constant 0 : i64
%signal = stream.file.write await(%wait) => %resource[%c0], %file[%offset], %c1088 : !stream.resource<variable>{%c1088} -> !stream.file => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %signal : !stream.timepoint
+ util.return %signal : !stream.timepoint
}
// -----
// CHECK-LABEL: @variable_read
// CHECK-SAME: (%[[WAIT:.+]]: i64) -> (!util.buffer, i64)
-func.func @variable_read(%wait: !stream.timepoint) -> (!stream.resource<variable>, !stream.timepoint) {
+util.func public @variable_read(%wait: !stream.timepoint) -> (!stream.resource<variable>, !stream.timepoint) {
%c0 = arith.constant 0 : index
%c16 = arith.constant 16 : index
%c32 = arith.constant 32 : index
@@ -69,5 +69,5 @@
// CHECK: %[[SIGNAL:.+]] = arith.constant 0 : i64
%signal = stream.file.read await(%wait) => %file[%c100], %resource[%c32], %c32 : !stream.file -> !stream.resource<variable>{%c64} => !stream.timepoint
// CHECK: return %[[STORAGE]], %[[SIGNAL]]
- return %resource, %signal : !stream.resource<variable>, !stream.timepoint
+ util.return %resource, %signal : !stream.resource<variable>, !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/resource_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/resource_ops.mlir
index 9de6d8f..f6326b5 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/resource_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/resource_ops.mlir
@@ -2,35 +2,35 @@
// CHECK-LABEL: @resourceAlloc
// CHECK-SAME: (%[[LENGTH:.+]]: index)
-func.func @resourceAlloc(%length: index) -> !stream.resource<transient> {
+util.func public @resourceAlloc(%length: index) -> !stream.resource<transient> {
// CHECK: %[[BUFFER:.+]], %[[STORAGE:.+]] = hal_inline.buffer.allocate alignment(%c64) : !hal.buffer{%[[LENGTH]]}
%result = stream.resource.alloc uninitialized : !stream.resource<transient>{%length}
// CHECK: return %[[STORAGE]]
- return %result : !stream.resource<transient>
+ util.return %result : !stream.resource<transient>
}
// -----
// CHECK-LABEL: @resourceAlloca
// CHECK-SAME: (%[[LENGTH:.+]]: index)
-func.func @resourceAlloca(%length: index) -> (!stream.resource<staging>, !stream.timepoint) {
+util.func public @resourceAlloca(%length: index) -> (!stream.resource<staging>, !stream.timepoint) {
// CHECK: %[[BUFFER:.+]], %[[STORAGE:.+]] = hal_inline.buffer.allocate alignment(%c64) : !hal.buffer{%[[LENGTH]]}
%0:2 = stream.resource.alloca uninitialized : !stream.resource<staging>{%length} => !stream.timepoint
// CHECK: %[[IMMEDIATE:.+]] = arith.constant 0 : i64
// CHECK: return %[[STORAGE]], %[[IMMEDIATE]]
- return %0#0, %0#1 : !stream.resource<staging>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<staging>, !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceAllocaAwait
// CHECK-SAME: (%[[LENGTH:.+]]: index, %[[TIMEPOINT:.+]]: i64)
-func.func @resourceAllocaAwait(%length: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<staging>, !stream.timepoint) {
+util.func public @resourceAllocaAwait(%length: index, %await_timepoint: !stream.timepoint) -> (!stream.resource<staging>, !stream.timepoint) {
// CHECK: %[[BUFFER:.+]], %[[STORAGE:.+]] = hal_inline.buffer.allocate alignment(%c64) : !hal.buffer{%[[LENGTH]]}
%0:2 = stream.resource.alloca uninitialized await(%await_timepoint) => !stream.resource<staging>{%length} => !stream.timepoint
// CHECK: %[[IMMEDIATE:.+]] = arith.constant 0 : i64
// CHECK: return %[[STORAGE]], %[[IMMEDIATE]]
- return %0#0, %0#1 : !stream.resource<staging>, !stream.timepoint
+ util.return %0#0, %0#1 : !stream.resource<staging>, !stream.timepoint
}
// -----
@@ -38,11 +38,11 @@
// NOTE: we don't do anything with deallocs today but could add a discard op.
// CHECK-LABEL: @resourceDealloca
-func.func @resourceDealloca(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) -> !stream.timepoint {
+util.func public @resourceDealloca(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) -> !stream.timepoint {
%0 = stream.resource.dealloca %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
// CHECK: %[[IMMEDIATE:.+]] = arith.constant 0 : i64
// CHECK: return %[[IMMEDIATE]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
@@ -50,28 +50,28 @@
// NOTE: we don't do anything with deallocs today but could add a discard op.
// CHECK-LABEL: @resourceDeallocaAwait
-func.func @resourceDeallocaAwait(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) -> !stream.timepoint {
+util.func public @resourceDeallocaAwait(%arg0: index, %arg1: !stream.resource<staging>, %arg2: !stream.timepoint) -> !stream.timepoint {
%0 = stream.resource.dealloca await(%arg2) => %arg1 : !stream.resource<staging>{%arg0} => !stream.timepoint
// CHECK: %[[IMMEDIATE:.+]] = arith.constant 0 : i64
// CHECK: return %[[IMMEDIATE]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @resourceSize
-func.func @resourceSize(%arg0: !stream.resource<transient>) -> index {
+util.func public @resourceSize(%arg0: !stream.resource<transient>) -> index {
// CHECK: %[[SIZE:.+]] = util.buffer.size %arg0
%0 = stream.resource.size %arg0 : !stream.resource<transient>
// CHECK: return %[[SIZE]]
- return %0 : index
+ util.return %0 : index
}
// -----
// CHECK-LABEL: @resourceTryMap
// CHECK-SAME: (%[[SOURCE:.+]]: !util.buffer)
-func.func @resourceTryMap(%source: !util.buffer) -> (i1, !stream.resource<constant>) {
+util.func public @resourceTryMap(%source: !util.buffer) -> (i1, !stream.resource<constant>) {
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 100
%offset = arith.constant 100 : index
// CHECK-DAG: %[[LENGTH:.+]] = arith.constant 128
@@ -81,41 +81,41 @@
// CHECK-DAG: %[[DID_MAP:.+]] = arith.constant true
%did_map, %mapping = stream.resource.try_map %source[%offset] : !util.buffer -> i1, !stream.resource<constant>{%length}
// CHECK: return %[[DID_MAP]], %[[MAPPING]]
- return %did_map, %mapping : i1, !stream.resource<constant>
+ util.return %did_map, %mapping : i1, !stream.resource<constant>
}
// -----
// CHECK-LABEL: @resourceLoad
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[BUFFER_SIZE:.+]]: index, %[[OFFSET:.+]]: index)
-func.func @resourceLoad(%resource: !stream.resource<staging>, %resource_size: index, %offset: index) -> i32 {
+util.func public @resourceLoad(%resource: !stream.resource<staging>, %resource_size: index, %offset: index) -> i32 {
// CHECK: %[[VALUE:.+]] = util.buffer.load %[[BUFFER]][%[[OFFSET]] for {{.+}}] : !util.buffer{%[[BUFFER_SIZE]]} -> i32
%0 = stream.resource.load %resource[%offset] : !stream.resource<staging>{%resource_size} -> i32
// CHECK: return %[[VALUE]]
- return %0 : i32
+ util.return %0 : i32
}
// -----
// CHECK-LABEL: @resourceStore
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[BUFFER_SIZE:.+]]: index, %[[OFFSET:.+]]: index)
-func.func @resourceStore(%resource: !stream.resource<staging>, %resource_size: index, %offset: index) {
+util.func public @resourceStore(%resource: !stream.resource<staging>, %resource_size: index, %offset: index) {
// CHECK-DAG: %[[VALUE:.+]] = arith.constant 123
%value = arith.constant 123 : i32
// CHECK: util.buffer.store %[[VALUE]], %[[BUFFER]][%[[OFFSET]] for {{.+}}] : i32 -> !util.buffer{%[[BUFFER_SIZE]]}
stream.resource.store %value, %resource[%offset] : i32 -> !stream.resource<staging>{%resource_size}
- return
+ util.return
}
// -----
// CHECK-LABEL: @resourceSubview
// CHECK-SAME: (%[[BUFFER:.+]]: !util.buffer, %[[BUFFER_SIZE:.+]]: index)
-func.func @resourceSubview(%resource: !stream.resource<transient>, %resource_size: index) -> !stream.resource<transient> {
+util.func public @resourceSubview(%resource: !stream.resource<transient>, %resource_size: index) -> !stream.resource<transient> {
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
// CHECK: %[[SUBSPAN:.+]] = util.buffer.subspan %[[BUFFER]][%c128] : !util.buffer{%[[BUFFER_SIZE]]} -> !util.buffer{%c256}
%0 = stream.resource.subview %resource[%c128] : !stream.resource<transient>{%resource_size} -> !stream.resource<transient>{%c256}
// CHECK: return %[[SUBSPAN]]
- return %0 : !stream.resource<transient>
+ util.return %0 : !stream.resource<transient>
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/timepoint_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/timepoint_ops.mlir
index aef6a09..67793ea 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/timepoint_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/timepoint_ops.mlir
@@ -6,43 +6,43 @@
// CHECK-LABEL: @rwTimepoint
// CHECK-SAME: = 0 : i64
util.global private mutable @rwTimepoint = #stream.timepoint<immediate>
-// CHECK: func.func @globalTimepoint(%arg0: i64) -> i64
-func.func @globalTimepoint(%arg0: !stream.timepoint) -> !stream.timepoint {
+// CHECK: util.func public @globalTimepoint(%arg0: i64) -> i64
+util.func public @globalTimepoint(%arg0: !stream.timepoint) -> !stream.timepoint {
// CHECK: util.global.store %arg0, @rwTimepoint
util.global.store %arg0, @rwTimepoint : !stream.timepoint
// CHECK: %[[VALUE:.+]] = util.global.load @rwTimepoint
%value = util.global.load @rwTimepoint : !stream.timepoint
// CHECK: return %[[VALUE]]
- return %value : !stream.timepoint
+ util.return %value : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointImmediate
-func.func @timepointImmediate() -> !stream.timepoint {
+util.func public @timepointImmediate() -> !stream.timepoint {
// CHECK: %[[TIMEPOINT:.+]] = arith.constant 0
%0 = stream.timepoint.immediate => !stream.timepoint
// CHECK: return %[[TIMEPOINT]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointJoin
-func.func @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
+util.func public @timepointJoin(%arg0: !stream.timepoint, %arg1: !stream.timepoint) -> !stream.timepoint {
// CHECK: %[[TIMEPOINT:.+]] = arith.constant 0
%0 = stream.timepoint.join max(%arg0, %arg1) => !stream.timepoint
// CHECK: return %[[TIMEPOINT]]
- return %0 : !stream.timepoint
+ util.return %0 : !stream.timepoint
}
// -----
// CHECK-LABEL: @timepointAwait
-func.func @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
+util.func public @timepointAwait(%arg0: !stream.timepoint, %arg1: !stream.resource<staging>, %arg2: !stream.resource<*>) -> (!stream.resource<staging>, !stream.resource<*>) {
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
%0:2 = stream.timepoint.await %arg0 => %arg1, %arg2 : !stream.resource<staging>{%c100}, !stream.resource<*>{%c200}
// CHECK: return %arg1, %arg2
- return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
+ util.return %0#0, %0#1 : !stream.resource<staging>, !stream.resource<*>
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/transfer_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/transfer_ops.mlir
index 2bbd473..8b6562a 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/transfer_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Conversion/StreamToHALInline/test/transfer_ops.mlir
@@ -2,10 +2,10 @@
// CHECK-LABEL: @tensorImportBuffer
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer, %[[RESOURCE_SIZE:.+]]: index, %[[DIM:.+]]: index) -> !hal.buffer
-func.func @tensorImportBuffer(%buffer: !hal.buffer, %resource_size: index, %dim: index) -> !stream.resource<external> {
+util.func public @tensorImportBuffer(%buffer: !hal.buffer, %resource_size: index, %dim: index) -> !stream.resource<external> {
%0 = stream.tensor.import %buffer : !hal.buffer -> tensor<?x5xf32>{%dim} in !stream.resource<external>{%resource_size}
// CHECK: return %[[BUFFER]]
- return %0 : !stream.resource<external>
+ util.return %0 : !stream.resource<external>
}
// -----
@@ -16,28 +16,28 @@
// CHECK-LABEL: @tensorImportBufferView
// CHECK-SAME: (%[[BUFFER_VIEW:.+]]: !hal.buffer_view, %[[RESOURCE_SIZE:.+]]: index, %[[DIM:.+]]: index) -> !hal.buffer
-func.func @tensorImportBufferView(%buffer_view: !hal.buffer_view, %resource_size: index, %dim: index) -> !stream.resource<external> {
+util.func public @tensorImportBufferView(%buffer_view: !hal.buffer_view, %resource_size: index, %dim: index) -> !stream.resource<external> {
// CHECK: %[[BUFFER:.+]] = hal_inline.buffer_view.buffer<%[[BUFFER_VIEW]] : !hal.buffer_view> : !hal.buffer
%0 = stream.tensor.import %buffer_view : !hal.buffer_view -> tensor<?x5xf32>{%dim} in !stream.resource<external>{%resource_size}
// CHECK: return %[[BUFFER]]
- return %0 : !stream.resource<external>
+ util.return %0 : !stream.resource<external>
}
// -----
// CHECK-LABEL: @tensorExportBuffer
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer, %[[RESOURCE_SIZE:.+]]: index, %[[DIM:.+]]: index) -> !hal.buffer
-func.func @tensorExportBuffer(%resource: !stream.resource<external>, %resource_size: index, %dim: index) -> !hal.buffer {
+util.func public @tensorExportBuffer(%resource: !stream.resource<external>, %resource_size: index, %dim: index) -> !hal.buffer {
%0 = stream.tensor.export %resource : tensor<?x1x10xf32>{%dim} in !stream.resource<external>{%resource_size} -> !hal.buffer
// CHECK: return %[[BUFFER]]
- return %0 : !hal.buffer
+ util.return %0 : !hal.buffer
}
// -----
// CHECK-LABEL: @tensorExportBufferView
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer, %[[RESOURCE_SIZE:.+]]: index, %[[DIM:.+]]: index) -> !hal.buffer
-func.func @tensorExportBufferView(%resource: !stream.resource<external>, %resource_size: index, %dim: index) -> !hal.buffer_view {
+util.func public @tensorExportBufferView(%resource: !stream.resource<external>, %resource_size: index, %dim: index) -> !hal.buffer_view {
// CHECK: %[[BUFFER_VIEW:.+]] = hal_inline.buffer_view.create
// CHECK-SAME: buffer(%[[BUFFER]] : !hal.buffer)
// CHECK-SAME: shape([%[[DIM]], %c1, %c10])
@@ -45,5 +45,5 @@
// CHECK-SAME: encoding(%c1_i32)
%0 = stream.tensor.export %resource : tensor<?x1x10xf32>{%dim} in !stream.resource<external>{%resource_size} -> !hal.buffer_view
// CHECK: return %[[BUFFER_VIEW]]
- return %0 : !hal.buffer_view
+ util.return %0 : !hal.buffer_view
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/test/buffer_folding.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/test/buffer_folding.mlir
index 0d0ad9f..f02fe9e 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/IR/test/buffer_folding.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/IR/test/buffer_folding.mlir
@@ -1,34 +1,34 @@
// RUN: iree-opt --split-input-file --canonicalize -cse %s | iree-opt --split-input-file | FileCheck %s
-// CHECK-LABEL: func @FoldBufferLengthOp
+// CHECK-LABEL: @FoldBufferLengthOp
// CHECK-SAME: (%[[LENGTH:.+]]: index)
-func.func @FoldBufferLengthOp(%length: index) -> index {
+util.func public @FoldBufferLengthOp(%length: index) -> index {
%c64 = arith.constant 64 : index
%buffer, %storage = hal_inline.buffer.allocate alignment(%c64) : !hal.buffer{%length} in !util.buffer
// CHECK-NOT: hal_inline.buffer.length
%queried_length = hal_inline.buffer.length<%buffer : !hal.buffer> : index
// CHECK: return %[[LENGTH]]
- return %queried_length : index
+ util.return %queried_length : index
}
// -----
-// CHECK-LABEL: func @FoldBufferStorageOp
-func.func @FoldBufferStorageOp(%length: index) -> !util.buffer {
+// CHECK-LABEL: @FoldBufferStorageOp
+util.func public @FoldBufferStorageOp(%length: index) -> !util.buffer {
%c64 = arith.constant 64 : index
// CHECK: %[[BUFFER:.+]], %[[STORAGE:.+]] = hal_inline.buffer.allocate
%buffer, %storage = hal_inline.buffer.allocate alignment(%c64) : !hal.buffer{%length} in !util.buffer
// CHECK-NOT: hal_inline.buffer.storage
%queried_storage = hal_inline.buffer.storage<%buffer : !hal.buffer> : !util.buffer
// CHECK: return %[[STORAGE]]
- return %queried_storage : !util.buffer
+ util.return %queried_storage : !util.buffer
}
// -----
// CHECK-LABEL: @FoldBufferViewCreateSubspan
// CHECK-SAME: (%[[BASE_BUFFER:.+]]: !hal.buffer, %[[SUBSPAN_OFFSET:.+]]: index, %[[SUBSPAN_LENGTH:.+]]: index)
-func.func @FoldBufferViewCreateSubspan(%base_buffer: !hal.buffer, %subspan_offset: index, %subspan_length: index) -> !hal.buffer_view {
+util.func public @FoldBufferViewCreateSubspan(%base_buffer: !hal.buffer, %subspan_offset: index, %subspan_length: index) -> !hal.buffer_view {
%subspan = hal_inline.buffer.subspan<%base_buffer : !hal.buffer>[%subspan_offset, %subspan_length] : !hal.buffer
// CHECK-DAG: %[[VIEW_OFFSET:.+]] = arith.constant 512
%view_offset = arith.constant 512 : index
@@ -44,14 +44,14 @@
shape([%dim0])
type(%type)
encoding(%encoding) : !hal.buffer_view
- return %view : !hal.buffer_view
+ util.return %view : !hal.buffer_view
}
// -----
-// CHECK-LABEL: func @SkipBufferViewBufferOp
+// CHECK-LABEL: @SkipBufferViewBufferOp
// CHECK-SAME: (%[[BUFFER:.+]]: !hal.buffer)
-func.func @SkipBufferViewBufferOp(%buffer: !hal.buffer) -> !hal.buffer {
+util.func public @SkipBufferViewBufferOp(%buffer: !hal.buffer) -> !hal.buffer {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : i32
%c10 = arith.constant 10 : index
@@ -64,5 +64,5 @@
encoding(%c1) : !hal.buffer_view
%view_buffer = hal_inline.buffer_view.buffer<%view : !hal.buffer_view> : !hal.buffer
// CHECK: return %[[BUFFER]]
- return %view_buffer : !hal.buffer
+ util.return %view_buffer : !hal.buffer
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
index 95173a6..f317326 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
@@ -115,7 +115,7 @@
innerModuleBuilder.getFunctionType(inputTypes, {});
// Create the function and insert into the module.
- auto dispatchFuncOp = func::FuncOp::create(
+ auto dispatchFuncOp = IREE::Util::FuncOp::create(
exportOp.getLoc(),
("__dispatch_" + executableOp.getName() + "_" + exportOp.getName())
.str(),
@@ -127,7 +127,7 @@
// Build the dispatch function by calling the target function in a loop.
auto bodyFuncOp =
- innerSymbolTable.lookup<func::FuncOp>(exportOp.getName());
+ innerSymbolTable.lookup<FunctionOpInterface>(exportOp.getName());
if (!bodyFuncOp) {
return exportOp.emitOpError("missing body function");
}
@@ -181,7 +181,8 @@
// about the function signatures.
LogicalResult
rewriteWorkgroupSignature(IREE::HAL::PipelineLayoutAttr layoutAttr,
- size_t totalBindingCount, func::FuncOp bodyFuncOp) {
+ size_t totalBindingCount,
+ FunctionOpInterface bodyFuncOp) {
auto *entryBlock = &bodyFuncOp.front();
auto builder = OpBuilder::atBlockBegin(entryBlock);
auto indexType = builder.getIndexType();
@@ -333,8 +334,9 @@
// workgroup_count_x, workgroup_count_y, workgroup_count_z)
void buildDispatchFunc(IREE::HAL::ExecutableExportOp exportOp,
IREE::HAL::PipelineLayoutAttr layoutAttr,
- size_t totalBindingCount, func::FuncOp bodyFuncOp,
- func::FuncOp dispatchFuncOp) {
+ size_t totalBindingCount,
+ FunctionOpInterface bodyFuncOp,
+ FunctionOpInterface dispatchFuncOp) {
auto loc = exportOp.getLoc();
auto builder = OpBuilder::atBlockBegin(dispatchFuncOp.addEntryBlock());
IndexSet indexSet(loc, builder);
@@ -408,8 +410,9 @@
[&](OpBuilder &forXBuilder, Location loc, Value ix,
ValueRange iters) {
workgroupArgs[workgroupXYZOffset + 0] = ix;
- forXBuilder.create<func::CallOp>(loc, bodyFuncOp,
- workgroupArgs);
+ forXBuilder.create<func::CallOp>(
+ loc, bodyFuncOp.getNameAttr(),
+ bodyFuncOp.getResultTypes(), workgroupArgs);
forXBuilder.create<scf::YieldOp>(loc);
});
forYBuilder.create<scf::YieldOp>(loc);
@@ -417,7 +420,7 @@
forZBuilder.create<scf::YieldOp>(loc);
});
- builder.create<func::ReturnOp>(loc);
+ builder.create<IREE::Util::ReturnOp>(loc);
}
};
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
index 14f4d28..f88d821 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
@@ -29,7 +29,7 @@
util.global.store %buffer_cst, @global_constant : !util.buffer
util.return
}
- func.func @dispatch_0(
+ func.func public @dispatch_0(
%local_memory: !util.buffer,
%constants: !util.buffer,
%bindings: !util.list<!util.buffer>,
@@ -68,7 +68,7 @@
%scaled = arith.mulf %mul, %constant1_f32 : f32
util.buffer.store %scaled, %buffer2[%idx for %c4] : f32 -> !util.buffer{%buffer2_size}
}
- return
+ func.return
}
}
}
@@ -83,9 +83,9 @@
// CHECK: util.global.store %[[CONSTANT]], @global_constant
// Ensures that we properly rename the dispatch function we inline:
-func.func private @dispatch_0()
+util.func private @dispatch_0()
-// CHECK-LABEL: func private @dispatch_0_0
+// CHECK-LABEL: func.func private @dispatch_0_0
// CHECK-SAME: (%[[LOCAL_MEMORY:.+]]: !util.buffer, %[[CONSTANT0:.+]]: i32, %[[CONSTANT1:.+]]: i32,
// CHECK-SAME: %[[BINDING0:.+]]: !util.buffer, %[[BINDING1:.+]]: !util.buffer, %[[BINDING2:.+]]: !util.buffer,
// CHECK-SAME: %[[X:[a-z0-9]+]]: index, %[[Y:[a-z0-9]+]]: index, %[[Z:[a-z0-9]+]]: index,
@@ -117,7 +117,7 @@
// CHECK: util.buffer.store %[[SCALED]], %[[BINDING2]][%[[ELEMENT_OFFSET]] for {{.+}}] : f32 -> !util.buffer{%[[BINDING2_SIZE]]}
// CHECK: return
-// CHECK-LABEL: func private @__dispatch_ex_dispatch_0
+// CHECK-LABEL: util.func private @__dispatch_ex_dispatch_0
// CHECK-SAME: (%[[WORKLOAD_X:.+]]: index, %[[WORKLOAD_Y:.+]]: index, %[[CONSTANT0:.+]]: i32, %[[CONSTANT1:.+]]: i32,
// CHECK-SAME: %[[BINDING0:.+]]: !util.buffer, %[[BINDING1:.+]]: !util.buffer, %[[BINDING2:.+]]: !util.buffer,
// CHECK-SAME: %[[OFFSET0:[a-z0-9]+]]: index, %[[OFFSET1:[a-z0-9]+]]: index, %[[OFFSET2:[a-z0-9]+]]: index,
@@ -151,13 +151,13 @@
// CHECK-SAME: %[[X]], %[[Y]], %[[Z]],
// CHECK-SAME: %[[SIZE_XYZ]], %[[SIZE_XYZ]], %[[SIZE_XYZ]],
// CHECK-SAME: %[[COUNT_X]], %[[COUNT_Y]], %[[COUNT_Z]])
-// CHECK: return
+// CHECK: util.return
// CHECK-LABEL: @dispatch0
// CHECK-SAME: (%[[RESOURCE0:.+]]: !stream.resource<constant>,
// CHECK-SAME: %[[RESOURCE1:.+]]: !stream.resource<transient>,
// CHECK-SAME: %[[RESOURCE2:.+]]: !stream.resource<external>)
-func.func private @dispatch0(%resource0: !stream.resource<constant>, %resource1: !stream.resource<transient>, %resource2: !stream.resource<external>) {
+util.func private @dispatch0(%resource0: !stream.resource<constant>, %resource1: !stream.resource<transient>, %resource2: !stream.resource<external>) {
%workload_x = arith.constant 1000 : index
%workload_y = arith.constant 1001 : index
%constant0 = arith.constant 4 : i32
@@ -186,5 +186,5 @@
]
}
} => !stream.timepoint
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/test/executable_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/test/executable_ops.mlir
index 8328041..fb5e5f1 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/test/executable_ops.mlir
@@ -3,13 +3,13 @@
// CHECK-LABEL: @executableLoad
// CHECK-SAME: (%[[EXECUTABLE_DATA:.+]]: !vm.buffer)
-func.func @executableLoad(%executable_data: !util.buffer) -> !hal.executable {
+util.func public @executableLoad(%executable_data: !util.buffer) -> !hal.executable {
// CHECK-DAG: %[[CONSTANTS:.+]] = vm.const.ref.zero : !vm.buffer
// CHECK-DAG: %[[FORMAT_STR:.+]] = vm.rodata.inline {{.+}} : !vm.buffer = "executable_format"
// CHECK: %[[EXECUTABLE:.+]] = vm.call @hal_loader.executable.load(%[[FORMAT_STR]], %[[EXECUTABLE_DATA]], %[[CONSTANTS]])
%executable = hal_loader.executable.load format("executable_format") data(%executable_data) : !hal.executable
// CHECK: return %[[EXECUTABLE]]
- return %executable : !hal.executable
+ util.return %executable : !hal.executable
}
// -----
@@ -17,7 +17,7 @@
// CHECK-LABEL: @executableDispatch
// CHECK-SAME: (%[[EXECUTABLE:.+]]: !vm.ref<!hal.executable>,
// CHECK-SAME: %[[BUFFER0:.+]]: !vm.buffer, %[[BUFFER1:.+]]: !vm.buffer)
-func.func @executableDispatch(%executable: !hal.executable, %buffer0: !util.buffer, %buffer1: !util.buffer) {
+util.func public @executableDispatch(%executable: !hal.executable, %buffer0: !util.buffer, %buffer1: !util.buffer) {
// CHECK-DAG: %[[COUNT_X:.+]] = vm.const.i32 1000
%count_x = arith.constant 1000 : index
// CHECK-DAG: %[[COUNT_Y:.+]] = vm.const.i32 1001
@@ -50,5 +50,5 @@
// CHECK-SAME: (%[[BUFFER1]], %[[OFFSET1]], %[[LENGTH1]])
(%buffer1 : !util.buffer)[%offset1, %length1]
])
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
index 7e5471d..08f834a 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
@@ -32,7 +32,7 @@
// CHECK-LABEL: @cmdDispatch
// CHECK-SAME: (%[[BUFFER0:.+]]: !util.buffer, %[[BUFFER0_SIZE:.+]]: index,
// CHECK-SAME: %[[BUFFER1:.+]]: !hal.buffer, %[[BUFFER1_SIZE:.+]]: index)
-func.func @cmdDispatch(%buffer0: !stream.resource<transient>, %buffer0_size: index,
+util.func public @cmdDispatch(%buffer0: !stream.resource<transient>, %buffer0_size: index,
%buffer1: !stream.resource<external>, %buffer1_size: index) -> !stream.timepoint {
// (ends up by the dispatch below)
%workload_x = arith.constant 1000 : index
@@ -87,5 +87,5 @@
}
} => !stream.timepoint
// CHECK: return %c0
- return %fence : !stream.timepoint
+ util.return %fence : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/test/dispatch_folding.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/test/dispatch_folding.mlir
index 150cecb..b47948a 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/test/dispatch_folding.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/test/dispatch_folding.mlir
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --canonicalize -cse %s | iree-opt --split-input-file | FileCheck %s
// CHECK-LABEL: @fold_binding_subspans_into_dispatch
-func.func @fold_binding_subspans_into_dispatch(
+util.func public @fold_binding_subspans_into_dispatch(
// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable,
%executable: !hal.executable,
// CHECK-SAME: %[[BUFFER:.+]]: !util.buffer, %[[SUBSPAN_OFFSET:.+]]: index, %[[SUBSPAN_LENGTH:.+]]: index
@@ -26,5 +26,5 @@
// CHECK: (%[[BUFFER]] : !util.buffer)[%[[ABSOLUTE_OFFSET]], %[[BINDING_LENGTH]]]
(%subspan : !util.buffer)[%binding_offset, %binding_length]
])
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/test/materialize_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/test/materialize_executables.mlir
index 5294d5a..cafb262 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/test/materialize_executables.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Transforms/test/materialize_executables.mlir
@@ -24,11 +24,11 @@
}
// CHECK-LABEL: @get_ex0
-func.func private @get_ex0() -> !hal.executable {
+util.func private @get_ex0() -> !hal.executable {
// CHECK: %[[EX0:.+]] = util.global.load @ex0 : !hal.executable
%ex0 = hal_loader.executable.lookup executable(@ex0) : !hal.executable
// CHECK: return %[[EX0]]
- return %ex0 : !hal.executable
+ util.return %ex0 : !hal.executable
}
// CHECK: util.global private @ex1 : !hal.executable
@@ -40,9 +40,9 @@
}
// CHECK-LABEL: @get_ex1
-func.func private @get_ex1() -> !hal.executable {
+util.func private @get_ex1() -> !hal.executable {
// CHECK: %[[EX1:.+]] = util.global.load @ex1 : !hal.executable
%ex1 = hal_loader.executable.lookup executable(@ex1) : !hal.executable
// CHECK: return %[[EX1]]
- return %ex1 : !hal.executable
+ util.return %ex1 : !hal.executable
}
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/ParamsToVM/test/parameter_ops.mlir b/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/ParamsToVM/test/parameter_ops.mlir
index 54ddd68..a570f9f 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/ParamsToVM/test/parameter_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/ParamsToVM/test/parameter_ops.mlir
@@ -3,7 +3,7 @@
// CHECK-LABEL: @parameterLoad
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[QUEUE_AFFINITY:.+]]: i64, %[[WAIT:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL:.+]]: !vm.ref<!hal.fence>)
-func.func @parameterLoad(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence) -> (!hal.buffer, !hal.buffer) {
+util.func public @parameterLoad(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence) -> (!hal.buffer, !hal.buffer) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -23,14 +23,14 @@
// CHECK-DAG: %[[C1:.+]] = vm.const.i32 1
// CHECK-DAG: %[[TARGET_BUFFER1:.+]] = vm.list.get.ref %[[TARGET_BUFFERS]], %[[C1]]
// CHECK: return %[[TARGET_BUFFER0]], %[[TARGET_BUFFER1]]
- return %target_buffers#0, %target_buffers#1 : !hal.buffer, !hal.buffer
+ util.return %target_buffers#0, %target_buffers#1 : !hal.buffer, !hal.buffer
}
// -----
// CHECK-LABEL: @parameterLoadNoScope
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[QUEUE_AFFINITY:.+]]: i64, %[[WAIT:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL:.+]]: !vm.ref<!hal.fence>)
-func.func @parameterLoadNoScope(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence) -> !hal.buffer {
+util.func public @parameterLoadNoScope(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence) -> !hal.buffer {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
// CHECK: %[[KEY_TABLE:.+]], %[[KEY_DATA:.+]] = vm.rodata.table.inline i32 : !vm.buffer, !vm.buffer = ["key"]
@@ -45,14 +45,14 @@
// CHECK-DAG: %[[C0:.+]] = vm.const.i32 0
// CHECK-DAG: %[[TARGET_BUFFER:.+]] = vm.list.get.ref %[[TARGET_BUFFERS]], %[[C0]]
// CHECK: return %[[TARGET_BUFFER]]
- return %target_buffer : !hal.buffer
+ util.return %target_buffer : !hal.buffer
}
// -----
// CHECK-LABEL: @parameterGather
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[QUEUE_AFFINITY:.+]]: i64, %[[WAIT:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL:.+]]: !vm.ref<!hal.fence>, %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @parameterGather(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence, %target_buffer: !hal.buffer) {
+util.func public @parameterGather(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence, %target_buffer: !hal.buffer) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -73,14 +73,14 @@
"scope"::"key1"[%c51_i64] -> %target_buffer[%c101 for %c201] : !hal.buffer,
"scope"::"key2"[%c52_i64] -> %target_buffer[%c102 for %c202] : !hal.buffer
}
- return
+ util.return
}
// -----
// CHECK-LABEL: @parameterScatter
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[QUEUE_AFFINITY:.+]]: i64, %[[WAIT:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL:.+]]: !vm.ref<!hal.fence>, %[[SOURCE_BUFFER:.+]]: !vm.ref<!hal.buffer>)
-func.func @parameterScatter(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence, %source_buffer: !hal.buffer) {
+util.func public @parameterScatter(%device: !hal.device, %queue_affinity: i64, %wait: !hal.fence, %signal: !hal.fence, %source_buffer: !hal.buffer) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -101,5 +101,5 @@
%source_buffer[%c101 for %c201] : !hal.buffer -> "scope"::"key1"[%c51_i64],
%source_buffer[%c102 for %c202] : !hal.buffer -> "scope"::"key2"[%c52_i64]
}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/StreamToParams/test/parameter_ops.mlir b/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/StreamToParams/test/parameter_ops.mlir
index 92035c3..4842e51 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/StreamToParams/test/parameter_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Conversion/StreamToParams/test/parameter_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @parameterLoad
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence) -> (!hal.buffer, !hal.buffer, !hal.fence)
-func.func @parameterLoad(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
+util.func public @parameterLoad(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.resource<constant>, !stream.timepoint) {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -20,14 +20,14 @@
"scope"::"key1"[%c51_i64] : !stream.resource<constant>{%c101}
} => !stream.timepoint
// CHECK: return %[[BUFFERS]]#0, %[[BUFFERS]]#1, %[[SIGNAL]]
- return %results#0, %results#1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
+ util.return %results#0, %results#1, %result_timepoint : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterLoadNoScope
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence) -> (!hal.buffer, !hal.fence)
-func.func @parameterLoadNoScope(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.timepoint) {
+util.func public @parameterLoadNoScope(%wait: !stream.timepoint) -> (!stream.resource<constant>, !stream.timepoint) {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
// CHECK-DAG: %[[DEVICE:.+]] = hal.devices.get %{{.+}}
@@ -41,14 +41,14 @@
"key"[%c50_i64] : !stream.resource<constant>{%c100}
} => !stream.timepoint
// CHECK: return %[[BUFFER]], %[[SIGNAL]]
- return %result, %result_timepoint : !stream.resource<constant>, !stream.timepoint
+ util.return %result, %result_timepoint : !stream.resource<constant>, !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterRead
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[TARGET:.+]]: !hal.buffer) -> !hal.fence
-func.func @parameterRead(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @parameterRead(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -61,14 +61,14 @@
// CHECK-NEXT: "scope"::"key"[%c50_i64] -> %[[TARGET]][%c100 for %c200] : !hal.buffer
%timepoint = stream.parameter.read await(%wait) => "scope"::"key"[%c50_i64] -> %target[%c100 for %c200] : !stream.resource<transient>{%c300} => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterWrite
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[SOURCE:.+]]: !hal.buffer) -> !hal.fence
-func.func @parameterWrite(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @parameterWrite(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c100 = arith.constant 100 : index
%c200 = arith.constant 200 : index
@@ -81,14 +81,14 @@
// CHECK-NEXT: %[[SOURCE]][%c100 for %c200] : !hal.buffer -> "scope"::"key"[%c50_i64]
%timepoint = stream.parameter.write await(%wait) => %source[%c100 for %c200] : !stream.resource<transient>{%c300} -> "scope"::"key"[%c50_i64] => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterGather
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[TARGET:.+]]: !hal.buffer) -> !hal.fence
-func.func @parameterGather(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @parameterGather(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -113,14 +113,14 @@
"scope"::"key2"[%c52_i64] -> %target[%c102 for %c202] : !stream.resource<transient>{%c300}
} => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterGatherNoScope
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[TARGET:.+]]: !hal.buffer) -> !hal.fence
-func.func @parameterGatherNoScope(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @parameterGatherNoScope(%wait: !stream.timepoint, %target: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c100 = arith.constant 100 : index
@@ -140,14 +140,14 @@
"key1"[%c51_i64] -> %target[%c101 for %c201] : !stream.resource<transient>{%c300}
} => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
// -----
// CHECK-LABEL: @parameterScatter
// CHECK-SAME: (%[[WAIT:.+]]: !hal.fence, %[[SOURCE:.+]]: !hal.buffer) -> !hal.fence
-func.func @parameterScatter(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
+util.func public @parameterScatter(%wait: !stream.timepoint, %source: !stream.resource<transient>) -> !stream.timepoint {
%c50_i64 = arith.constant 50 : i64
%c51_i64 = arith.constant 51 : i64
%c52_i64 = arith.constant 52 : i64
@@ -173,5 +173,5 @@
%source[%c102 for %c202] : !stream.resource<transient>{%c300} -> "scope"::"key2"[%c52_i64]
} => !stream.timepoint
// CHECK: return %[[SIGNAL]]
- return %timepoint : !stream.timepoint
+ util.return %timepoint : !stream.timepoint
}
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/IR/test/parameter_ops.mlir b/compiler/src/iree/compiler/Modules/IO/Parameters/IR/test/parameter_ops.mlir
index e60c73e..bf3f808 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/IR/test/parameter_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/IR/test/parameter_ops.mlir
@@ -2,7 +2,7 @@
// CHECK-LABEL: @parameterLoad
// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device, %[[WAIT:.+]]: !hal.fence, %[[SIGNAL:.+]]: !hal.fence)
-func.func @parameterLoad(%device: !hal.device, %wait: !hal.fence, %signal: !hal.fence) {
+util.func public @parameterLoad(%device: !hal.device, %wait: !hal.fence, %signal: !hal.fence) {
// CHECK-DAG: %[[AFFINITY:.+]] = arith.constant -1
%affinity = arith.constant -1 : i64
// CHECK-DAG: %[[OFFSET:.+]] = arith.constant 0
@@ -24,5 +24,5 @@
usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|SharingImmutable") {
"scope"::"w0"[%offset] : !hal.buffer{%length}
}
- return
+ util.return
}
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/Passes.td b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/Passes.td
index 61b2247..36d374f 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/Passes.td
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/Passes.td
@@ -33,7 +33,6 @@
def GenerateSplatParameterArchivePass :
Pass<"iree-io-generate-splat-parameter-archive", "mlir::ModuleOp"> {
let summary = "Generates a .irpa file with splat entries for all parameters";
- let dependentDialects = [];
let options = [
Option<"archivePath", "archive-path", "std::string",
/*default=*/"",
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/export_parameters.mlir b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/export_parameters.mlir
index 1a474d0..5354708 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/export_parameters.mlir
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/export_parameters.mlir
@@ -9,7 +9,7 @@
util.global private @array_global_0 = dense<[[11.0, 12.0]]> : tensor<1x2xf32>
util.global private @dense_global_1 = dense<"0x0000E040000000410000104100002041"> : tensor<2x2xf32>
util.global private @dense_global_2 = dense<"0x0000803F000000400000404000008040"> : tensor<2x2xf32>
- func.func @parameter_example(%arg0: tensor<1x2xf32>) -> tensor<1x2xf32> {
+ util.func public @parameter_example(%arg0: tensor<1x2xf32>) -> tensor<1x2xf32> {
%cst = arith.constant 0.000000e+00 : f32
%3 = util.global.load @array_global_0 : tensor<1x2xf32>
%4 = util.global.load @dense_global_1 : tensor<2x2xf32>
@@ -21,6 +21,6 @@
%10 = linalg.add ins(%8, %5 : tensor<1x2xf32>, tensor<1x2xf32>) outs(%empty : tensor<1x2xf32>) -> tensor<1x2xf32>
%12 = linalg.matmul ins(%10, %4 : tensor<1x2xf32>, tensor<2x2xf32>) outs(%fill : tensor<1x2xf32>) -> tensor<1x2xf32>
%14 = linalg.add ins(%12, %3 : tensor<1x2xf32>, tensor<1x2xf32>) outs(%empty : tensor<1x2xf32>) -> tensor<1x2xf32>
- return %14 : tensor<1x2xf32>
+ util.return %14 : tensor<1x2xf32>
}
}
diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/generate_splat_parameter_archive.mlir b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/generate_splat_parameter_archive.mlir
index c99d390..b77dfa8 100644
--- a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/generate_splat_parameter_archive.mlir
+++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/test/generate_splat_parameter_archive.mlir
@@ -11,7 +11,7 @@
util.global private @dense_global_1 = #stream.parameter.named<"model"::"global_1"> : tensor<2x2xi32>
util.global private @dense_global_2 = #stream.parameter.named<"model"::"global_2"> : tensor<1x2xi32>
util.global private @dense_global_3 = #stream.parameter.named<"model"::"global_3"> : tensor<2x2xi32>
- func.func @forward(%arg0: tensor<1x2xi32>) -> tensor<1x2xi32> {
+ util.func public @forward(%arg0: tensor<1x2xi32>) -> tensor<1x2xi32> {
%cst = arith.constant 0 : i32
%3 = util.global.load @array_global_0 : tensor<1x2xi32>
%4 = util.global.load @dense_global_1 : tensor<2x2xi32>
@@ -23,7 +23,7 @@
%10 = linalg.add ins(%8, %5 : tensor<1x2xi32>, tensor<1x2xi32>) outs(%empty : tensor<1x2xi32>) -> tensor<1x2xi32>
%12 = linalg.matmul ins(%10, %4 : tensor<1x2xi32>, tensor<2x2xi32>) outs(%fill : tensor<1x2xi32>) -> tensor<1x2xi32>
%14 = linalg.add ins(%12, %3 : tensor<1x2xi32>, tensor<1x2xi32>) outs(%empty : tensor<1x2xi32>) -> tensor<1x2xi32>
- return %14 : tensor<1x2xi32>
+ util.return %14 : tensor<1x2xi32>
}
}
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/test/external_function_spec.mlir b/compiler/src/iree/compiler/Preprocessing/Common/test/external_function_spec.mlir
index 579ea29..5152aca 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/test/external_function_spec.mlir
+++ b/compiler/src/iree/compiler/Preprocessing/Common/test/external_function_spec.mlir
@@ -1,18 +1,18 @@
// Test for importing functions from this spec to a payload module.
// Tested in `transform_symbol_importing.mlir`
module attributes {transform.with_named_sequence} {
- func.func private @some_external_function(%arg0: tensor<?xf32>) -> tensor<?xf32>
+ util.func private @some_external_function(%arg0: tensor<?xf32>) -> tensor<?xf32>
- func.func @some_function(%arg0: tensor<?xf32>) -> tensor<?xf32> {
- return %arg0 : tensor<?xf32>
+ util.func @some_function(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+ util.return %arg0 : tensor<?xf32>
}
transform.named_sequence @__transform_main(%module: !transform.any_op) {
- %new_func = transform.iree.import_symbol @some_function into %module : (!transform.any_op) -> !transform.any_op
+ %new_func = transform.util.import_symbol @some_function into %module : (!transform.any_op) -> !transform.any_op
- %func = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.any_op
- %module_2 = transform.iree.get_nearest_symbol_table %func : (!transform.any_op) -> !transform.any_op
- %new_func_2 = transform.iree.import_symbol @some_external_function into %module_2 : (!transform.any_op) -> !transform.any_op
+ %func = transform.structured.match ops{["util.func"]} in %module : (!transform.any_op) -> !transform.any_op
+ %module_2 = transform.util.get_nearest_symbol_table %func : (!transform.any_op) -> !transform.any_op
+ %new_func_2 = transform.util.import_symbol @some_external_function into %module_2 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/test/transform_symbol_importing.mlir b/compiler/src/iree/compiler/Preprocessing/Common/test/transform_symbol_importing.mlir
index 77a6f5b..d77a4e3 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/test/transform_symbol_importing.mlir
+++ b/compiler/src/iree/compiler/Preprocessing/Common/test/transform_symbol_importing.mlir
@@ -5,6 +5,6 @@
}
// CHECK-LABEL: module @example
-// CHECK: func.func private @some_external_function(tensor<?xf32>) -> tensor<?xf32>
-// CHECK: func.func @some_function(%arg0: tensor<?xf32>) -> tensor<?xf32>
-// CHECK-NEXT: return %arg0 : tensor<?xf32>
+// CHECK: util.func private @some_external_function(%arg0: tensor<?xf32>) -> tensor<?xf32>
+// CHECK: util.func public @some_function(%arg0: tensor<?xf32>) -> tensor<?xf32>
+// CHECK-NEXT: util.return %arg0 : tensor<?xf32>
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp
index 71b03e9..23087ef 100644
--- a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp
@@ -30,98 +30,6 @@
}
//===----------------------------------------------------------------------===//
-// GetNearestSymbolTableOp
-//===----------------------------------------------------------------------===//
-
-DiagnosedSilenceableFailure
-IREE::transform_dialect::GetNearestSymbolTableOp::applyToOne(
- transform::TransformRewriter &rewriter, Operation *target,
- transform::ApplyToEachResultList &results,
- transform::TransformState &state) {
- auto tableOp = SymbolTable::getNearestSymbolTable(target);
- if (!tableOp) {
- return emitDefaultDefiniteFailure(target);
- }
- results.push_back(tableOp);
- return DiagnosedSilenceableFailure::success();
-}
-
-void IREE::transform_dialect::GetNearestSymbolTableOp::getEffects(
- SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
- transform::onlyReadsHandle(getTarget(), effects);
- transform::producesHandle(getResult(), effects);
- transform::modifiesPayload(effects);
-}
-
-//===----------------------------------------------------------------------===//
-// ImportSymbolOp
-//===----------------------------------------------------------------------===//
-
-DiagnosedSilenceableFailure IREE::transform_dialect::ImportSymbolOp::apply(
- transform::TransformRewriter &rewriter,
- transform::TransformResults &transformResults,
- transform::TransformState &state) {
- auto symbolOp = SymbolTable::lookupNearestSymbolFrom(*this, getSymbol());
- if (!symbolOp) {
- return emitDefiniteFailure() << "could not find corresponding symbol op";
- }
- // Require isolated from above as the clone does not make sense with escaping
- // values.
- if (!symbolOp->hasTrait<OpTrait::IsIsolatedFromAbove>()) {
- return emitDefiniteFailure()
- << "target symbol op is not isolated from above";
- }
- StringRef symbol = getSymbol().getLeafReference();
- SmallVector<Operation *> results;
- for (Operation *payloadOp : state.getPayloadOps(getSymbolTable())) {
- if (!payloadOp->hasTrait<OpTrait::SymbolTable>()) {
- return emitDefiniteFailure()
- << "target symbol table " << payloadOp << " is not a symbol table";
- }
- SymbolTable symbolTable(payloadOp);
-
- if (Operation *preExistingSymbolOp = symbolTable.lookup(symbol)) {
- if (getForceImport()) {
- // If we want to overwrite pre-existing symbols, just erase it here.
- symbolTable.erase(preExistingSymbolOp);
- } else if (getIfUndefined()) {
- // Skip if we want to use the symbol that is already there.
- results.push_back(preExistingSymbolOp);
- continue;
- } else {
- return emitDefiniteFailure()
- << "target symbol " << symbol << " is already defined";
- }
- }
-
- // Symbol table ops must have exactly one region with exactly one block.
- // Simply clone the target symbol op into the single block.
- rewriter.setInsertionPointToStart(&payloadOp->getRegion(0).front());
- results.push_back(rewriter.clone(*symbolOp));
- }
- transformResults.set(cast<OpResult>(getClonedSymbol()), results);
- return DiagnosedSilenceableFailure::success();
-}
-
-void IREE::transform_dialect::ImportSymbolOp::getEffects(
- SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
- transform::onlyReadsHandle(getSymbolTable(), effects);
- transform::producesHandle(getClonedSymbol(), effects);
- transform::modifiesPayload(effects);
-}
-
-LogicalResult IREE::transform_dialect::ImportSymbolOp::verify() {
- if (getForceImport() && getIfUndefined()) {
- return emitOpError()
- << "force_import and if_undefined are mutually exclusive";
- }
- if (!SymbolTable::lookupNearestSymbolFrom(*this, getSymbol())) {
- return emitOpError() << "invalid import of undefined symbol";
- }
- return success();
-}
-
-//===----------------------------------------------------------------------===//
// MatchCastCompatibleDagFromRootOp
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td
index de0232f..af7eba0 100644
--- a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td
+++ b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td
@@ -15,74 +15,6 @@
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/OpBase.td"
-def GetNearestSymbolTableOp : Op<Transform_Dialect, "iree.get_nearest_symbol_table",
- [FunctionalStyleTransformOpTrait,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
- TransformOpInterface,
- TransformEachOpTrait,
- ReportTrackingListenerFailuresOpTrait]> {
- let description = [{
- Returns the nearest symbol table op for each op in the payload, inclusive.
-
- #### Return modes
-
- This operation reads the `target` handle and produces the `result`
- handle. This operation emits a definite failure if the nearest symbol table
- is unknown.
- }];
-
- let arguments = (ins TransformHandleTypeInterface:$target);
- let results = (outs TransformHandleTypeInterface:$result);
-
- let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
- let cppNamespace = "mlir::iree_compiler::IREE::transform_dialect";
- let extraClassDeclaration = [{
- ::mlir::DiagnosedSilenceableFailure applyToOne(
- ::mlir::transform::TransformRewriter &rewriter,
- ::mlir::Operation* target,
- ::mlir::transform::ApplyToEachResultList &results,
- ::mlir::transform::TransformState &state);
- }];
-}
-
-def ImportSymbolOp : Op<Transform_Dialect, "iree.import_symbol",
- [FunctionalStyleTransformOpTrait,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
- DeclareOpInterfaceMethods<TransformOpInterface>,
- ReportTrackingListenerFailuresOpTrait]> {
- let description = [{
- Clones the op defined by the given symbol into the given symbol table and
- returns the cloned symbol. If `force_import` is set, this will (unsafely)
- overwrite any pre-existing definitions of the same symbol. If
- `if_undefined` is set, this will return a handle to the pre-existing symbol
- in the payload if found instead of failing.
-
- #### Return modes
-
- This operation reads the `symbol_table` handle and produces the
- `cloned_symbol` handle. This operation emits a definite failure if the if
- the `symbol_table` op does not define a symbol table.
-
- This will emit a definite failure if the symbol already exists in the
- symbol table and neither `force_import` and `if_undefined` are set.
- }];
-
- let arguments = (ins SymbolRefAttr:$symbol,
- UnitAttr:$if_undefined,
- UnitAttr:$force_import,
- TransformHandleTypeInterface:$symbol_table);
- let results = (outs TransformHandleTypeInterface:$cloned_symbol);
-
- let assemblyFormat = [{
- (`force` $force_import^)? $symbol `into` $symbol_table
- (`if` `undefined` $if_undefined^)? attr-dict
- `:` functional-type(operands, results)
- }];
- let cppNamespace = "mlir::iree_compiler::IREE::transform_dialect";
-
- let hasVerifier = 1;
-}
-
def MatchCastCompatibleDagFromRootOp : Op<Transform_Dialect, "iree.match.cast_compatible_dag_from_root",
[IsolatedFromAbove,
MatchOpInterface,
diff --git a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
index 1867e5e..c709e20 100644
--- a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
+++ b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
@@ -68,7 +68,7 @@
} // hal.executable.variant
} // hal.executable
- func.func @call_mul_abs_negate(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
+ util.func private @call_mul_abs_negate(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
%dim = tensor.dim %arg0, %c0 : tensor<?xf32>
%dim_i32 = arith.index_cast %dim : index to i32
@@ -79,12 +79,10 @@
#hal.interface.binding<0, 0>,
#hal.interface.binding<0, 1>,
#hal.interface.binding<0, 2>
- ],
- // HACK: keep the executable live through DCE. Only required when
- // using the automatic variant selection.
- hal.executable.ref = [@executable]
+ ]
} : (i32, tensor<?xf32>{%dim}, tensor<?xf32>{%dim}) -> tensor<?xf32>{%dim}
- return %0 : tensor<?xf32>
+
+ util.return %0 : tensor<?xf32>
}
transform.named_sequence @match_mul_abs_negate(%root: !transform.any_op {transform.readonly}) -> (!transform.any_value, !transform.any_value) {
@@ -137,10 +135,10 @@
transform.named_sequence @cast_and_call_dag(%ins: !transform.any_value {transform.readonly},
%out: !transform.any_value {transform.readonly}) {
%root = transform.get_defining_op %out : (!transform.any_value) -> !transform.any_op
- %module = transform.iree.get_nearest_symbol_table %root : (!transform.any_op) -> !transform.any_op
- %executable = transform.iree.import_symbol @executable into %module if undefined : (!transform.any_op) -> !transform.any_op
- %func = transform.iree.import_symbol @call_mul_abs_negate into %module if undefined : (!transform.any_op) -> !transform.any_op
- transform.func.cast_and_call %func(%ins) -> %out after %root {
+ %module = transform.util.get_nearest_symbol_table %root : (!transform.any_op) -> !transform.any_op
+ %executable = transform.util.import_symbol @executable into %module if undefined : (!transform.any_op) -> !transform.any_op
+ %func = transform.util.import_symbol @call_mul_abs_negate into %module if undefined : (!transform.any_op) -> !transform.any_op
+ transform.util.cast_and_call %func(%ins) -> %out after %root {
// This specifies how to resolve type mismatches between the arguments
// of the function and the inputs from the matcher. In this example,
// the only casts this will generate are same-rank tensor casts that
@@ -155,7 +153,7 @@
// add a new symbol to the module's symbol table.
transform.named_sequence @__transform_main(%module: !transform.any_op) {
// Gather the set of functions within the module.
- %funcs = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.any_op
+ %funcs = transform.structured.match ops{["util.func"]} in %module : (!transform.any_op) -> !transform.any_op
// For each function in the module, run the matcher on all contained
// operations.
transform.foreach %funcs : !transform.any_op {
diff --git a/samples/custom_dispatch/cpu/mlp_plugin/mlp_spec.mlir b/samples/custom_dispatch/cpu/mlp_plugin/mlp_spec.mlir
index 72a9860..eec83f7 100644
--- a/samples/custom_dispatch/cpu/mlp_plugin/mlp_spec.mlir
+++ b/samples/custom_dispatch/cpu/mlp_plugin/mlp_spec.mlir
@@ -1,4 +1,4 @@
-// Sample spec that matches an MLP example and forwards to
+// Sample spec that matches an MLP example and forwards to
// an implementation implemented by a system plugin.
// Is used along with samples/custom_dispatch/cpu/plugin/mlp.mlir
@@ -51,7 +51,7 @@
}
}
- func.func private @call_mlp(%lhs : tensor<?x?xf32>, %rhs : tensor<?x?xf32>, %init1 : tensor<?x?xf32>, %init2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ util.func private @call_mlp(%lhs : tensor<?x?xf32>, %rhs : tensor<?x?xf32>, %init1 : tensor<?x?xf32>, %init2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%m = tensor.dim %lhs, %c0 : tensor<?x?xf32>
@@ -61,17 +61,15 @@
%n_i32 = arith.index_cast %n : index to i32
%k_i32 = arith.index_cast %k : index to i32
- %mlp_result = flow.dispatch @executable::@x86_64::@mlp[](%lhs, %rhs, %m_i32, %n_i32, %k_i32) {
+ %mlp_result = flow.dispatch @executable::@x86_64::@mlp(%lhs, %rhs, %m_i32, %n_i32, %k_i32) {
hal.interface.bindings = [
#hal.interface.binding<0, 0>,
#hal.interface.binding<0, 1>,
#hal.interface.binding<0, 2>
- ],
- // HACK: keep the executable live through DCE. Only required when
- // using the automatic variant selection.
- hal.executable.ref = [@executable]
- } : (tensor<?x?xf32>{%m, %k}, tensor<?x?xf32>{%k, %n}, i32, i32, i32) -> tensor<?x?xf32>{%m, %n}
- return %mlp_result : tensor<?x?xf32>
+ ]
+ } : (tensor<?x?xf32>{%m, %k}, tensor<?x?xf32>{%k, %n}, i32, i32, i32) -> tensor<?x?xf32>{%m, %n}
+
+ util.return %mlp_result : tensor<?x?xf32>
}
transform.named_sequence @match_mlp(%root: !transform.any_op {transform.readonly}) -> (!transform.any_value, !transform.any_value) {
@@ -104,10 +102,10 @@
transform.named_sequence @cast_and_call_dag(%ins: !transform.any_value {transform.readonly},
%out: !transform.any_value {transform.readonly}) {
%root = transform.get_defining_op %out : (!transform.any_value) -> !transform.any_op
- %module = transform.iree.get_nearest_symbol_table %root : (!transform.any_op) -> !transform.any_op
- %executable = transform.iree.import_symbol @executable into %module if undefined : (!transform.any_op) -> !transform.any_op
- %func = transform.iree.import_symbol @call_mlp into %module if undefined : (!transform.any_op) -> !transform.any_op
- transform.func.cast_and_call %func(%ins) -> %out after %root {
+ %module = transform.util.get_nearest_symbol_table %root : (!transform.any_op) -> !transform.any_op
+ %executable = transform.util.import_symbol @executable into %module if undefined : (!transform.any_op) -> !transform.any_op
+ %func = transform.util.import_symbol @call_mlp into %module if undefined : (!transform.any_op) -> !transform.any_op
+ transform.util.cast_and_call %func(%ins) -> %out after %root {
// This specifies how to resolve type mismatches between the arguments
// of the function and the inputs from the matcher. In this example,
// the only casts this will generate are same-rank tensor casts that
@@ -122,7 +120,7 @@
// add a new symbol to the module's symbol table.
transform.named_sequence @__transform_main(%module: !transform.any_op) {
// Gather the set of functions within the module.
- %funcs = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.any_op
+ %funcs = transform.structured.match ops{["util.func"]} in %module : (!transform.any_op) -> !transform.any_op
// For each function in the module, run the matcher on all contained
// operations.
transform.foreach %funcs : !transform.any_op {
diff --git a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
index 065b795..08b977d 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
@@ -32,7 +32,7 @@
count(%device: !hal.device, %workload: index) -> (index, index, index) {
%c1_0 = arith.constant 1 : index
hal.return %c1_0, %c1_0, %c1_0 : index, index, index
- }
+ }
layout(#hal.pipeline.layout<push_constants = 1, sets = [
<0, bindings = [
<0, storage_buffer, ReadOnly>,
@@ -40,11 +40,11 @@
]>
]>)
bindings([
- #hal.interface.binding<0, 0>,
+ #hal.interface.binding<0, 0>,
#hal.interface.binding<0, 1>
- ])
+ ])
objects({
- #spirv_target ordinal(0) = [
+ #spirv_target ordinal(0) = [
#hal.executable.object<{
path = "samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.spv"
}>
@@ -78,8 +78,8 @@
transform.match.param.cmpi eq %n_inputs, %c1 : !transform.param<i64>
%n_outputs = transform.match.structured.num_inits %argmax : (!transform.any_op) -> !transform.param<i64>
transform.match.param.cmpi eq %n_outputs, %c2 : !transform.param<i64>
-
- transform.match.structured.yield %argmax : !transform.any_op
+
+ transform.match.structured.yield %argmax : !transform.any_op
}
// Verify the operand shapes of the linalg op. For example, in the below,
@@ -125,8 +125,8 @@
// custom kernel authored above, and replace the users of the argmax with a
// call to the function.
transform.named_sequence @cast_and_call_argmax(%argmax: !transform.any_op {transform.readonly}) {
- %module = transform.iree.get_nearest_symbol_table %argmax : (!transform.any_op) -> !transform.any_op
- %func = transform.iree.import_symbol @argmax_1d_f32_entry_point into %module : (!transform.any_op) -> !transform.any_op
+ %module = transform.util.get_nearest_symbol_table %argmax : (!transform.any_op) -> !transform.any_op
+ %func = transform.util.import_symbol @argmax_1d_f32_entry_point into %module : (!transform.any_op) -> !transform.any_op
%ins = transform.get_operand %argmax[0] : (!transform.any_op) -> !transform.any_value
%outs = transform.get_result %argmax[1] : (!transform.any_op) -> !transform.any_value
transform.func.cast_and_call %func(%ins) -> %outs before %argmax {
@@ -144,7 +144,7 @@
// add a new symbol to the module's symbol table.
transform.named_sequence @__transform_main(%module: !transform.any_op) {
// Gather the set of functions within the module.
- %funcs = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.any_op
+ %funcs = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.any_op
// For each function in the module, run the matcher on all contained
// operations.
transform.foreach %funcs : !transform.any_op {
diff --git a/tests/compiler_driver/inline_dynamic_hal_executable.mlir b/tests/compiler_driver/inline_dynamic_hal_executable.mlir
index 0fcf0e9..9c50d03 100644
--- a/tests/compiler_driver/inline_dynamic_hal_executable.mlir
+++ b/tests/compiler_driver/inline_dynamic_hal_executable.mlir
@@ -1,6 +1,5 @@
// RUN: iree-compile \
// RUN: --compile-to=hal \
-// RUN: --mlir-print-ir-after-all \
// RUN: --iree-execution-model=inline-dynamic \
// RUN: --iree-hal-target-backends=vmvx %s | FileCheck %s
@@ -9,7 +8,8 @@
return %0, %arg0 : tensor<4xf32>, tensor<4xf32>
}
-// Check the IR not registered as iree_hal_module_register_loader_types
+// Check that the IR isn't using types from the full HAL
+// (only those in iree_hal_module_register_loader_types).
// CHECK-NOT: hal.command_buffer
// CHECK-NOT: hal.allocator
// CHECK-NOT: hal.event
diff --git a/tests/compiler_driver/inline_static_hal_executable.mlir b/tests/compiler_driver/inline_static_hal_executable.mlir
index 55f0c59..2ac89e9 100644
--- a/tests/compiler_driver/inline_static_hal_executable.mlir
+++ b/tests/compiler_driver/inline_static_hal_executable.mlir
@@ -1,6 +1,5 @@
// RUN: iree-compile \
// RUN: --compile-to=hal \
-// RUN: --mlir-print-ir-after-all \
// RUN: --iree-execution-model=inline-static \
// RUN: --iree-hal-target-backends=vmvx-inline %s | FileCheck %s
@@ -9,7 +8,8 @@
return %0, %arg0 : tensor<4xf32>, tensor<4xf32>
}
-// Check the IR not registered as iree_hal_module_register_inline_types
+// Check that the IR isn't using types from the full HAL
+// (only those in iree_hal_module_register_inline_types).
// CHECK-NOT: hal.command_buffer
// CHECK-NOT: hal.allocator
// CHECK-NOT: hal.event
diff --git a/tests/compiler_driver/preprocessing_flags.mlir b/tests/compiler_driver/preprocessing_flags.mlir
index 8cd049f..6c4ad99 100644
--- a/tests/compiler_driver/preprocessing_flags.mlir
+++ b/tests/compiler_driver/preprocessing_flags.mlir
@@ -1,5 +1,5 @@
// RUN: iree-compile --iree-hal-target-backends=llvm-cpu --compile-to=preprocessing \
-// RUN: --iree-preprocessing-pass-pipeline="builtin.module(func.func(iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16}))" \
+// RUN: --iree-preprocessing-pass-pipeline="builtin.module(util.func(iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16}))" \
// RUN: --mlir-print-ir-after=iree-preprocessing-convert-conv2d-to-img2col --mlir-print-ir-after=iree-preprocessing-pad-linalg-ops %s 2>&1 \
// RUN: | FileCheck %s
@@ -8,11 +8,12 @@
outs(%arg2 : tensor<10x30xf32>) -> tensor<10x30xf32>
return %0 : tensor<10x30xf32>
}
+
// Just check that the pass runs, and that the compilation finishes
// CHECK: ConvertConv2DToImg2Col (iree-preprocessing-convert-conv2d-to-img2col)
// CHECK: PadLinalgOps (iree-preprocessing-pad-linalg-ops)
// CHECK-LABEL: module
-// CHECK-NEXT: func.func @test(
+// CHECK-NEXT: util.func public @test(
// CHECK-DAG: %[[ARG0:.+]] = hal.tensor.import %{{[a-zA-Z0-9]+}} "input0" : !hal.buffer_view -> tensor<10x20xf32>
// CHECK-DAG: %[[ARG1:.+]] = hal.tensor.import %{{[a-zA-Z0-9]+}} "input1" : !hal.buffer_view -> tensor<20x30xf32>
// CHECK-DAG: %[[ARG2:.+]] = hal.tensor.import %{{[a-zA-Z0-9]+}} "input2" : !hal.buffer_view -> tensor<10x30xf32>
diff --git a/tests/transform_dialect/cpu/BUILD.bazel b/tests/transform_dialect/cpu/BUILD.bazel
index 5249a7c..aabea9d 100644
--- a/tests/transform_dialect/cpu/BUILD.bazel
+++ b/tests/transform_dialect/cpu/BUILD.bazel
@@ -17,9 +17,11 @@
"attention.mlir",
"contraction-packing.mlir",
"contraction-packing-and-dispatch.mlir",
- "eltwise_reduction_eltwise.mlir",
+ # DISABLED: incorrectly assuming default flag values.
+ # "eltwise_reduction_eltwise.mlir",
"fold_tensor_slice_into_transfer.mlir",
- "matmul.mlir",
+ # DISABLED: incorrectly assuming default flag values.
+ # "matmul.mlir",
"matmul_library_call.mlir",
],
cfg = "//tests:lit.cfg.py",
diff --git a/tests/transform_dialect/cpu/CMakeLists.txt b/tests/transform_dialect/cpu/CMakeLists.txt
index 7ddd39f..4328bf4 100644
--- a/tests/transform_dialect/cpu/CMakeLists.txt
+++ b/tests/transform_dialect/cpu/CMakeLists.txt
@@ -17,9 +17,7 @@
"attention.mlir"
"contraction-packing-and-dispatch.mlir"
"contraction-packing.mlir"
- "eltwise_reduction_eltwise.mlir"
"fold_tensor_slice_into_transfer.mlir"
- "matmul.mlir"
"matmul_library_call.mlir"
TOOLS
${IREE_LLD_TARGET}
diff --git a/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir b/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
index 622db6a..bb0c56a 100644
--- a/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
+++ b/tests/transform_dialect/cpu/contraction-packing-and-dispatch.mlir
@@ -2,11 +2,9 @@
// Preprocessing with generalized packing.
//
// RUN: iree-opt %s --iree-transform-dialect-interpreter --transform-dialect-drop-schedule | \
-// RUN: iree-opt --iree-hal-target-backends=llvm-cpu \
-// RUN: --iree-abi-transformation-pipeline \
-// RUN: --iree-flow-transformation-pipeline \
-// RUN: --iree-stream-transformation-pipeline \
-// RUN: --iree-hal-configuration-pipeline | \
+// RUN: iree-compile - --iree-hal-target-backends=llvm-cpu \
+// RUN: --iree-opt-data-tiling=false \
+// RUN: --compile-to=executable-configurations | \
// RUN: FileCheck %s
!a_tensor_t = tensor<1234x567xf32>
@@ -16,10 +14,6 @@
// Note: the normalization in these maps is gone due to InterchangeGenericOps.
// When using generalized packing, it would be better to drop that pass.
-// CHECK-DAG: #[[$map_lhs:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d4, d2, d5)>
-// CHECK-DAG: #[[$map_rhs:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d1, d3, d5)>
-// CHECK-DAG: #[[$map_res:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
// CHECK-LABEL: func.func @matmul_dispatch_0
// CHECK: tensor.empty() : tensor<155x18x8x32xf32>
// CHECK: tensor.pack
@@ -33,11 +27,12 @@
// CHECK: tensor.pack
// CHECK-LABEL: func.func @matmul_dispatch_3
-func.func @matmul(%arg0: !a_tensor_t, %arg2: !c_tensor_t) -> !c_tensor_t {
+func.func public @matmul(%arg0: !a_tensor_t, %arg2: !c_tensor_t) -> !c_tensor_t {
%rhs = arith.constant dense<0.1> : !b_tensor_t
%c0 = util.optimization_barrier %rhs : !b_tensor_t
// CHECK-NOT: pack
- // CHECK: linalg.generic {indexing_maps = [#[[$map_lhs]], #[[$map_rhs]], #[[$map_res]]],
+ // CHECK: linalg.generic
+ // CHECK-SAME: indexing_maps = [affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d4, d2, d5)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d1, d3, d5)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
// CHECK-SAME: ins(%{{.*}} : tensor<155x18x8x32xf32>, tensor<18x56x16x32xf32>)
// CHECK-SAME: outs(%{{.*}} : tensor<155x56x8x16xf32>)
diff --git a/tests/transform_dialect/cpu/eltwise_reduction_eltwise.mlir b/tests/transform_dialect/cpu/eltwise_reduction_eltwise.mlir
index 60a1738..0e61ca7 100644
--- a/tests/transform_dialect/cpu/eltwise_reduction_eltwise.mlir
+++ b/tests/transform_dialect/cpu/eltwise_reduction_eltwise.mlir
@@ -43,19 +43,17 @@
return %8 : !out_tensor_t
}
-
-// RUN: iree-opt %s --iree-hal-target-backends=llvm-cpu \
-// RUN: --iree-abi-transformation-pipeline \
-// RUN: --iree-flow-transformation-pipeline \
-// RUN: --iree-stream-transformation-pipeline \
-// RUN: --iree-hal-configuration-pipeline | \
-// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-codegen-materialize-user-configs, iree-llvmcpu-select-lowering-strategy, iree-llvmcpu-lower-executable-target)))' \
+// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
+// RUN: --iree-opt-data-tiling=false \
+// RUN: --compile-to=executable-configurations | \
+// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-codegen-materialize-user-configs,iree-llvmcpu-select-lowering-strategy,iree-llvmcpu-lower-executable-target)))' \
// RUN: --iree-llvmcpu-enable-transform-dialect-jit | \
// RUN: FileCheck %s
// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
+// RUN: --iree-opt-data-tiling=false \
// RUN: --iree-llvmcpu-enable-transform-dialect-jit | \
-// RUN: iree-run-module --module=- --function=reduce --device=local-task --input="32x256xf32=1" |\
+// RUN: iree-run-module --module=- --function=reduce --device=local-task --input="32x256xf32=1" | \
// RUN: FileCheck %s --check-prefix=EXEC
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
diff --git a/tests/transform_dialect/cpu/matmul.mlir b/tests/transform_dialect/cpu/matmul.mlir
index 1af272f..246b712 100644
--- a/tests/transform_dialect/cpu/matmul.mlir
+++ b/tests/transform_dialect/cpu/matmul.mlir
@@ -1,4 +1,3 @@
-
!A_size = tensor<3x5xf32>
!B_size = tensor<5x3xf32>
!C_size = tensor<3x3xf32>
@@ -10,19 +9,17 @@
return %0 : !C_size
}
-// RUN: iree-opt %s --iree-hal-target-backends=llvm-cpu \
-// RUN: --iree-abi-transformation-pipeline \
-// RUN: --iree-flow-transformation-pipeline \
-// RUN: --iree-stream-transformation-pipeline \
-// RUN: --iree-hal-configuration-pipeline | \
-// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-codegen-materialize-user-configs, iree-llvmcpu-lower-executable-target)))' \
+// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
+// RUN: --iree-opt-data-tiling=false \
+// RUN: --compile-to=executable-configurations | \
+// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-codegen-materialize-user-configs,iree-llvmcpu-lower-executable-target)))' \
// RUN: --iree-codegen-transform-dialect-library=%p/matmul_codegen_default_spec.mlir \
// RUN: --iree-codegen-use-transform-dialect-strategy=codegen | \
// RUN: FileCheck %s --check-prefixes=CODEGEN-DEFAULT
// CODEGEN-DEFAULT: hal.executable.export public @matmul_static_dispatch_0_matmul_3x3x5
-// CODEGEN-DEFAULT: %[[C2:.+]] = arith.constant 2 : index
-// CODEGEN-DEFAULT: %[[C1:.+]] = arith.constant 1 : index
+// CODEGEN-DEFAULT-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CODEGEN-DEFAULT-DAG: %[[C2:.+]] = arith.constant 2 : index
// CODEGEN-DEFAULT: hal.return %[[C2]], %[[C1]], %[[C1]]
// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
diff --git a/tools/test/compile_to_phase.mlir b/tools/test/compile_to_phase.mlir
index 0e2b853..0390564 100644
--- a/tools/test/compile_to_phase.mlir
+++ b/tools/test/compile_to_phase.mlir
@@ -1,9 +1,9 @@
// RUN: iree-compile --compile-to=input %s | FileCheck %s --check-prefix=INPUT-PHASE
-// INPUT-PHASE: func.func @abs(%[[ARG0:.+]]: tensor<f32>)
+// INPUT-PHASE: util.func public @abs(%[[ARG0:.+]]: tensor<f32>)
// INPUT-PHASE: math.absf %[[ARG0]] : tensor<f32>
// RUN: iree-compile --compile-to=abi %s | FileCheck %s --check-prefix=ABI-PHASE
-// ABI-PHASE: func.func @abs(%[[ARG0:.+]]: !hal.buffer_view)
+// ABI-PHASE: util.func public @abs(%[[ARG0:.+]]: !hal.buffer_view)
// ABI-PHASE: %[[INPUT:.+]] = hal.tensor.import %[[ARG0]] "input0" : !hal.buffer_view -> tensor<f32>
// ABI-PHASE: math.absf %[[INPUT]] : tensor<f32>