Removing -iree-flow-experimental-dispatch-reduce and the reduction region ops. PiperOrigin-RevId: 300161044
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp index 7e02912..dcebd2c 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOps.cpp +++ b/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
@@ -354,277 +354,6 @@ } //===----------------------------------------------------------------------===// -// flow.reduction.region -//===----------------------------------------------------------------------===// - -void ReductionRegionOp::build(Builder *builder, OperationState &state, - ArrayRef<Type> resultTypes, Value workload, - ValueRange operands, ValueRange initialValues, - ArrayRef<int32_t> dimensions, - ArrayRef<NamedAttribute> attributes) { - state.addTypes(resultTypes); - state.addOperands({workload}); - state.addOperands(operands); - state.addOperands(initialValues); - state.addAttribute( - "dimensions", - DenseIntElementsAttr::get( - VectorType::get({static_cast<int32_t>(dimensions.size())}, - builder->getIntegerType(32)), - dimensions)); - state.addAttributes(attributes); - state.addRegion(); - state.addRegion(); - state.setOperandListToResizable(); -} - -ParseResult parseReductionRegionOp(OpAsmParser &parser, - OperationState *result) { - OpAsmParser::OperandType workloadArg; - Type workloadArgType; - if (failed(parser.parseLSquare()) || - failed(parser.parseOperand(workloadArg)) || - failed(parser.parseColonType(workloadArgType)) || - failed(parser.parseRSquare()) || - failed(parser.resolveOperand(workloadArg, workloadArgType, - result->operands))) { - return failure(); - } - - if (failed(parser.parseLParen())) { - return failure(); - } - SmallVector<OpAsmParser::OperandType, 8> regionArgs; - SmallVector<OpAsmParser::OperandType, 8> reductionOperands; - SmallVector<Type, 8> reductionOperandTypes; - auto operandsLoc = parser.getCurrentLocation(); - do { - // Reserve entries in the lists. - regionArgs.emplace_back(); - reductionOperands.emplace_back(); - reductionOperandTypes.emplace_back(); - if (failed(parser.parseRegionArgument(regionArgs.back())) || - failed(parser.parseEqual()) || - failed(parser.parseOperand(reductionOperands.back())) || - failed(parser.parseColonType(reductionOperandTypes.back()))) { - return failure(); - } - } while (succeeded(parser.parseOptionalComma())); - if (failed(parser.parseRParen()) || - failed(parser.parseArrowTypeList(result->types)) || - failed(parser.resolveOperands(reductionOperands, reductionOperandTypes, - operandsLoc, result->operands))) { - return failure(); - } - result->setOperandListToResizable(); - - Region *dispatchRegion = result->addRegion(); - if (failed(parser.parseRegion(*dispatchRegion, regionArgs, - reductionOperandTypes))) { - return failure(); - } - - SmallVector<OpAsmParser::OperandType, 4> invocationRegionArgs; - SmallVector<Type, 4> invocationRegionArgTypes; - if (failed(parser.parseKeyword("invocation")) || - failed(parser.parseLParen())) { - return failure(); - } - do { - Type argType; - SmallVector<OpAsmParser::OperandType, 2> reductionRegionArgs; - if (failed(parser.parseLParen()) || - failed(parser.parseOperandList(reductionRegionArgs, 2)) || - failed(parser.parseRParen()) || - failed(parser.parseColonType(argType))) { - return failure(); - } - invocationRegionArgs.push_back(reductionRegionArgs[0]); - invocationRegionArgTypes.push_back(argType); - invocationRegionArgs.push_back(reductionRegionArgs[1]); - invocationRegionArgTypes.push_back(argType); - } while (succeeded(parser.parseOptionalComma())); - SmallVector<Type, 4> invocationResultTypes; - if (failed(parser.parseRParen()) || - failed(parser.parseArrowTypeList(invocationResultTypes))) { - return failure(); - } - - // Parse invocation body. - Region *invocationRegion = result->addRegion(); - if (failed(parser.parseRegion(*invocationRegion, invocationRegionArgs, - invocationRegionArgTypes)) || - failed(parser.parseOptionalAttrDict(result->attributes))) { - return failure(); - } - - return success(); -} - -void printReductionRegionOp(OpAsmPrinter &p, ReductionRegionOp op) { - p << op.getOperationName(); - - // Print the workload argument. - p << "["; - p.printOperand(op.workload()); - p << " : "; - p.printType(op.workload().getType()); - p << "]"; - - auto &dispatchBlock = op.dispatch().front(); - p << "("; - interleaveComma(llvm::zip(dispatchBlock.getArguments(), op.operands()), p, - [&](std::tuple<BlockArgument, Value> it) { - p << std::get<0>(it) << " = " << std::get<1>(it); - p << " : "; - p << std::get<1>(it).getType(); - }); - p << ", "; - interleaveComma( - llvm::zip(dispatchBlock.getArguments().slice(op.operands().size()), - op.initial_values()), - p, [&](std::tuple<BlockArgument, Value> it) { - p << std::get<0>(it) << " = " << std::get<1>(it); - p << " : "; - p << std::get<1>(it).getType(); - }); - p << ")"; - if (op.getNumResults() > 0) { - p << " -> "; - if (op.getNumResults() > 1) p << "("; - interleaveComma(op.getResultTypes(), p); - if (op.getNumResults() > 1) p << ")"; - } - p.printRegion(op.dispatch(), /*printEntryBlockArgs=*/false); - - p << " invocation("; - auto invocationType = op.getInvocationType(); - auto &entryBlock = op.invocation().getBlocks().front(); - int regionArgIndex = 0; - interleaveComma(invocationType.getInputs(), p, [&](Type operandType) { - p << "("; - p.printOperand(entryBlock.getArgument(regionArgIndex++)); - p << ", "; - p.printOperand(entryBlock.getArgument(regionArgIndex++)); - p << ") : "; - p.printType(operandType); - }); - p << ")"; - p.printArrowTypeList(invocationType.getResults()); - p.printRegion(op.invocation(), /*printEntryBlockArgs=*/false); - - p.printOptionalAttrDict(op.getAttrs(), - /*elidedAttrs=*/{}); -} - -FunctionType ReductionRegionOp::getInvocationType() { - return FunctionType::get(llvm::to_vector<4>(initial_values().getTypes()), - llvm::to_vector<4>(initial_values().getTypes()), - getContext()); -} - -//===----------------------------------------------------------------------===// -// flow.windowed_reduction.region -//===----------------------------------------------------------------------===// - -void WindowedReductionRegionOp::build( - Builder *builder, OperationState &state, ArrayRef<Type> resultTypes, - Value workload, ValueRange operands, ValueRange initialValues, - ArrayRef<int32_t> windowDimensions, ArrayRef<int32_t> windowStrides, - ArrayRef<int32_t> baseDilations, ArrayRef<int32_t> windowDilations, - PaddingMode paddingMode, ArrayRef<NamedAttribute> attributes) { - state.addTypes(resultTypes); - state.addOperands({workload}); - state.addOperands(operands); - state.addOperands(initialValues); - state.addAttribute( - "window_dimensions", - DenseIntElementsAttr::get( - VectorType::get({static_cast<int32_t>(windowDimensions.size())}, - builder->getIntegerType(32)), - windowDimensions)); - state.addAttribute( - "window_strides", - DenseIntElementsAttr::get( - VectorType::get({static_cast<int32_t>(windowStrides.size())}, - builder->getIntegerType(32)), - windowStrides)); - state.addAttribute( - "base_dilations", - DenseIntElementsAttr::get( - VectorType::get({static_cast<int32_t>(baseDilations.size())}, - builder->getIntegerType(32)), - baseDilations)); - state.addAttribute( - "window_dilations", - DenseIntElementsAttr::get( - VectorType::get({static_cast<int32_t>(windowDilations.size())}, - builder->getIntegerType(32)), - windowDilations)); - state.addAttribute("padding_mode", builder->getI32IntegerAttr( - static_cast<int32_t>(paddingMode))); - state.addAttributes(attributes); - state.addRegion(); - state.addRegion(); - state.setOperandListToResizable(); -} - -ParseResult parseWindowedReductionRegionOp(OpAsmParser &parser, - OperationState *result) { - return parseReductionRegionOp(parser, result); -} - -void printWindowedReductionRegionOp(OpAsmPrinter &p, - WindowedReductionRegionOp op) { - p << op.getOperationName(); - - // Print the workload argument. - p << "["; - p.printOperand(op.workload()); - p << " : "; - p.printType(op.workload().getType()); - p << "]"; - - p << "("; - p.printOperands(op.operands()); - p << ", "; - p.printOperands(op.initial_values()); - p << ")"; - if (op.getNumResults() > 0) { - p << " -> "; - if (op.getNumResults() > 1) p << "("; - interleaveComma(op.getResultTypes(), p); - if (op.getNumResults() > 1) p << ")"; - } - p.printRegion(op.dispatch(), /*printEntryBlockArgs=*/false); - - p << " invocation("; - auto invocationType = op.getInvocationType(); - auto &entryBlock = op.invocation().getBlocks().front(); - int regionArgIndex = 0; - interleaveComma(invocationType.getInputs(), p, [&](Type operandType) { - p << "("; - p.printOperand(entryBlock.getArgument(regionArgIndex++)); - p << ", "; - p.printOperand(entryBlock.getArgument(regionArgIndex++)); - p << ") : "; - p.printType(operandType); - }); - p << ")"; - p.printArrowTypeList(invocationType.getResults()); - p.printRegion(op.invocation(), /*printEntryBlockArgs=*/false); - - p.printOptionalAttrDict(op.getAttrs(), - /*elidedAttrs=*/{}); -} - -FunctionType WindowedReductionRegionOp::getInvocationType() { - return FunctionType::get(llvm::to_vector<4>(initial_values().getTypes()), - llvm::to_vector<4>(initial_values().getTypes()), - getContext()); -} - -//===----------------------------------------------------------------------===// // flow.executable //===----------------------------------------------------------------------===// @@ -714,75 +443,6 @@ } //===----------------------------------------------------------------------===// -// flow.reduction.entry / flow.windowed_reduction.entry -//===----------------------------------------------------------------------===// - -static ParseResult parseReductionEntryOp(OpAsmParser &parser, - OperationState *result) { - FlatSymbolRefAttr functionRefAttr; - FlatSymbolRefAttr applyRefAttr; - if (failed(parser.parseAttribute(functionRefAttr, "function_ref", - result->attributes)) || - failed(parser.parseKeyword("apply")) || failed(parser.parseLParen()) || - failed(parser.parseAttribute(applyRefAttr, "apply_ref", - result->attributes)) || - failed(parser.parseRParen())) { - return failure(); - } - - if (succeeded(parser.parseOptionalKeyword("as"))) { - StringAttr exportNameAttr; - if (failed(parser.parseLParen()) || - failed(parser.parseAttribute(exportNameAttr, "sym_name", - result->attributes)) || - failed(parser.parseRParen())) { - return failure(); - } - } else { - result->addAttribute("sym_name", parser.getBuilder().getStringAttr( - functionRefAttr.getValue())); - } - - if (failed(parser.parseOptionalAttrDictWithKeyword(result->attributes))) { - return failure(); - } - - return success(); -} - -static void printReductionEntryOp(OpAsmPrinter &p, ReductionEntryOp op) { - p << op.getOperationName() << ' '; - p.printSymbolName(op.function_ref()); - p << " apply("; - p.printSymbolName(op.apply_ref()); - p << ")"; - if (op.sym_name() != op.function_ref()) { - p << " as(\"" << op.sym_name() << "\")"; - } - p.printOptionalAttrDictWithKeyword( - op.getAttrs(), /*elidedAttrs=*/{"apply_ref", "function_ref", "sym_name"}); -} - -static ParseResult parseWindowedReductionEntryOp(OpAsmParser &parser, - OperationState *result) { - return parseReductionEntryOp(parser, result); -} - -static void printWindowedReductionEntryOp(OpAsmPrinter &p, - WindowedReductionEntryOp op) { - p << op.getOperationName() << ' '; - p.printSymbolName(op.function_ref()); - p << " apply("; - p.printSymbolName(op.apply_ref()); - p << ")"; - if (op.sym_name() != op.function_ref()) { - p << " as(\"" << op.sym_name() << "\")"; - } - p.printOptionalAttrDictWithKeyword( - op.getAttrs(), /*elidedAttrs=*/{"apply_ref", "function_ref", "sym_name"}); -} - -//===----------------------------------------------------------------------===// // flow.dispatch //===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td index 452d0c4..23a5616 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOps.td +++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -204,119 +204,6 @@ ]; } -def FLOW_ReductionRegionOp : FLOW_PureOp<"reduction.region", [ - IsolatedFromAbove, - SameVariadicOperandSize, - // TODO(benvanik): verify operands and initial values have the same element - // types (but NOT the same shapes). - ]> { - let summary = [{partitioned reduction region}]; - let description = [{ - A closure that defines a reduction operation over one or more inputs. - Reductions are dispatches with very specific semantics around the indexing - of work. Parititoning first isolates reduction regions prior to dispatch - regions so that such semantics can be identified for folding. - - The `dispatch` region contains the original reduction operation and the - `invocation` region contains the outlined per-invocation operation. - - This operation follows the XLA Reduce semantics: - https://www.tensorflow.org/xla/operation_semantics#reduce - }]; - - let arguments = (ins - FLOW_Workload:$workload, - Variadic<AnyType>:$operands, - Variadic<AnyType>:$initial_values, - // TODO(benvanik): use index types instead of i32. - OptionalAttr<I32ElementsAttr>:$dimensions - ); - let results = (outs - Variadic<AnyType>:$results - ); - - let regions = (region - AnyRegion:$dispatch, - AnyRegion:$invocation - ); - - let extraClassDeclaration = [{ - unsigned getNumReductionOperands() { - return std::distance(operands().begin(), operands().end()); - } - FunctionType getInvocationType(); - }]; - - let skipDefaultBuilders = 1; - let builders = [ - OpBuilder<[{ - Builder *builder, OperationState &state, ArrayRef<Type> resultTypes, - Value workload, ValueRange operands, - ValueRange initialValues, ArrayRef<int32_t> dimensions, - ArrayRef<NamedAttribute> attributes = {} - }]>, - ]; -} - -def FLOW_WindowedReductionRegionOp : FLOW_PureOp<"windowed_reduction.region", [ - SameVariadicOperandSize, - // TODO(benvanik): verify operands and initial values have the same element - // types (but NOT the same shapes). - ]> { - let summary = [{partitioned reduction region}]; - let description = [{ - A closure that defines a reduction operation over one or more inputs. - Reductions are dispatches with very specific semantics around the indexing - of work. Parititoning first isolates reduction regions prior to dispatch - regions so that such semantics can be identified for folding. - - The `dispatch` region contains the original reduction operation and the - `invocation` region contains the outlined per-invocation operation. - - This operation follows the XLA ReduceWindow semantics: - https://www.tensorflow.org/xla/operation_semantics#reducewindow - }]; - - let arguments = (ins - FLOW_Workload:$workload, - Variadic<FLOW_Tensor>:$operands, - Variadic<FLOW_Tensor>:$initial_values, - // TODO(benvanik): use index types instead of i32. - I32ElementsAttr:$window_dimensions, - I32ElementsAttr:$window_strides, - I32ElementsAttr:$base_dilations, - I32ElementsAttr:$window_dilations, - FLOW_PaddingModeAttr:$padding_mode - ); - let results = (outs - Variadic<FLOW_Tensor>:$results - ); - - let regions = (region - AnyRegion:$dispatch, - AnyRegion:$invocation - ); - - let extraClassDeclaration = [{ - unsigned getNumReductionOperands() { - return std::distance(operands().begin(), operands().end()); - } - FunctionType getInvocationType(); - }]; - - let skipDefaultBuilders = 1; - let builders = [ - OpBuilder<[{ - Builder *builder, OperationState &state, ArrayRef<Type> resultTypes, - Value workload, ValueRange operands, - ValueRange initialValues, ArrayRef<int32_t> windowDimensions, - ArrayRef<int32_t> windowStrides, ArrayRef<int32_t> baseDilations, - ArrayRef<int32_t> windowDilations, PaddingMode paddingMode, - ArrayRef<NamedAttribute> attributes = {} - }]>, - ]; -} - def FLOW_ReturnOp : FLOW_Op<"return", [Terminator]> { let summary = [{return from a flow.dispatch_region}]; let description = [{ @@ -407,60 +294,6 @@ ); } -def FLOW_ReductionEntryOp : FLOW_Op<"reduction.entry", [ - HasParent<"IREE::Flow::ExecutableOp">, - Symbol, - ]> { - let summary = [{defines an executable entry point for reduction operations}]; - let description = [{ - Specifies an exported function with an externally-visible alias. Multiple - exports can reference the same internal function. The computation represents - a reduction operation that has additional backend-specific semantics that - need to be lowered. - - This operation follows the XLA Reduce semantics: - https://www.tensorflow.org/xla/operation_semantics#reduce - }]; - - // TODO(benvanik): add a list of all used workloads. - let arguments = (ins - // TODO(benvanik): ref into child module. - StrAttr:$sym_name, - FlatSymbolRefAttr:$function_ref, - FlatSymbolRefAttr:$apply_ref, - I32Attr:$dimension - ); -} - -def FLOW_WindowedReductionEntryOp : FLOW_Op<"windowed_reduction.entry", [ - HasParent<"IREE::Flow::ExecutableOp">, - Symbol, - ]> { - let summary = [{defines an executable entry point for reduction operations}]; - let description = [{ - Specifies an exported function with an externally-visible alias. Multiple - exports can reference the same internal function. The computation represents - a reduction operation that has additional backend-specific semantics that - need to be lowered. - - This operation follows the XLA ReduceWindow semantics: - https://www.tensorflow.org/xla/operation_semantics#reducewindow - }]; - - // TODO(benvanik): add a list of all used workloads. - let arguments = (ins - // TODO(benvanik): ref into child module. - StrAttr:$sym_name, - FlatSymbolRefAttr:$function_ref, - FlatSymbolRefAttr:$apply_ref, - I32Attr:$window_dimension, - I32Attr:$window_stride, - I32Attr:$base_dilation, - I32Attr:$window_dilation, - FLOW_PaddingModeAttr:$padding - ); -} - //===----------------------------------------------------------------------===// // Dispatch ops //===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/Flow/IR/test/executable_ops.mlir b/iree/compiler/Dialect/Flow/IR/test/executable_ops.mlir index 94d7908..4494dff 100644 --- a/iree/compiler/Dialect/Flow/IR/test/executable_ops.mlir +++ b/iree/compiler/Dialect/Flow/IR/test/executable_ops.mlir
@@ -16,24 +16,3 @@ // CHECK: flow.dispatch.entry @dispatch0 as("dispatch0_alias") flow.dispatch.entry @dispatch0 as("dispatch0_alias") } - -// ----- - -// CHECK-LABEL: @reduction_ex -flow.executable @reduction_ex { - // CHECK: module { - module { - // CHECK: @entry - func @entry(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - // CHECK: @apply - func @apply(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { - %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> - return %0 : tensor<f32> - } - } - // CHECK: flow.reduction.entry @entry - // CHECK-SAME: apply(@apply) - // CHECK-SAME: as("entry_alias") - // CHECK-SAME: attributes {dimension = 1 : i32} - flow.reduction.entry @entry apply(@apply) as("entry_alias") attributes {dimension = 1 : i32} -}
diff --git a/iree/compiler/Dialect/Flow/IR/test/reduction_regions.mlir b/iree/compiler/Dialect/Flow/IR/test/reduction_regions.mlir deleted file mode 100644 index 12ea0e6..0000000 --- a/iree/compiler/Dialect/Flow/IR/test/reduction_regions.mlir +++ /dev/null
@@ -1,68 +0,0 @@ -// Tests printing and parsing of reduction region ops. - -// RUN: iree-opt -split-input-file %s | iree-opt -split-input-file | IreeFileCheck %s - -// CHECK-LABEL: @singleReduction -func @singleReduction(%arg0 : tensor<5x1xf32>) { - // CHECK: [[WORKLOAD:%.+]] = "some.shape"(%arg0) : (tensor<5x1xf32>) -> vector<3xi32> - %workload = "some.shape"(%arg0) : (tensor<5x1xf32>) -> vector<3xi32> - // CHECK: [[INITIALF:%.+]] = "some.constant"() : () -> tensor<f32> - %initialValueF = "some.constant"() : () -> tensor<f32> - // CHECK: = flow.reduction.region[ - // CHECK-SAME: [[WORKLOAD]] : vector<3xi32> - // CHECK-SAME: ]( - // CHECK-SAME: %arg1 = %arg0 : tensor<5x1xf32>, %arg2 = [[INITIALF]] : tensor<f32>) -> tensor<1xf32> { - // CHECK-NEXT: "xla_hlo.reduce"(%arg1, %arg2) ( { - // CHECK: } invocation((%arg1, %arg2) : tensor<f32>) -> tensor<f32> { - // CHECK-NEXT: = xla_hlo.add %arg1, %arg2 : tensor<f32> - // CHECK-NEXT: flow.return %3 : tensor<f32> - // CHECK-NEXT: } {dimensions = dense<1> : vector<1xi32>} - %0 = flow.reduction.region[%workload : vector<3xi32>](%arg1 = %arg0 : tensor<5x1xf32>, %arg2 = %initialValueF : tensor<f32>) -> tensor<1xf32> { - %1 = "xla_hlo.reduce"(%arg1, %arg2) ( { - ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors - %2 = xla_hlo.add %arg3, %arg4 : tensor<f32> - "xla_hlo.return"(%2) : (tensor<f32>) -> () - }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1xf32>, tensor<f32>) -> tensor<1xf32> - flow.return %1 : tensor<1xf32> - } invocation((%arg1, %arg2) : tensor<f32>) -> tensor<f32> { - %1 = xla_hlo.add %arg1, %arg2 : tensor<f32> - flow.return %1 : tensor<f32> - } {dimensions = dense<1> : vector<1xi32>} - return -} - -// ----- - -// CHECK-LABEL: @fusedReduction -func @fusedReduction(%arg0 : tensor<4x8xf32>, %arg1 : tensor<4x8xi32>) { - // CHECK: [[WORKLOAD:%.+]] = "some.shape"(%arg0) : (tensor<4x8xf32>) -> vector<3xi32> - %workload = "some.shape"(%arg0) : (tensor<4x8xf32>) -> vector<3xi32> - // CHECK: [[INITIALF:%.+]] = "some.constant"() : () -> tensor<f32> - // CHECK: [[INITIALI:%.+]] = "some.constant"() : () -> tensor<i32> - %initialValueF = "some.constant"() : () -> tensor<f32> - %initialValueI = "some.constant"() : () -> tensor<i32> - // CHECK: = flow.reduction.region[ - // CHECK-SAME: [[WORKLOAD]] : vector<3xi32> - // CHECK-SAME: ]( - // CHECK-SAME: %arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xi32>, %arg4 = [[INITIALF]] : tensor<f32>, %arg5 = [[INITIALI]] : tensor<i32>) -> (tensor<4xf32>, tensor<4xi32>) { - // CHECK-NEXT: "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5) ( { - // CHECK: } invocation((%arg2, %arg3) : tensor<f32>, (%arg4, %arg5) : tensor<i32>) -> (tensor<f32>, tensor<i32>) { - // CHECK-NEXT: = xla_hlo.add %arg2, %arg3 : tensor<f32> - // CHECK-NEXT: = xla_hlo.add %arg4, %arg5 : tensor<i32> - // CHECK-NEXT: flow.return %4, %5 : tensor<f32>, tensor<i32> - // CHECK-NEXT: } {dimensions = dense<1> : vector<1xi32>} - %0:2 = flow.reduction.region[%workload : vector<3xi32>](%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xi32>, %arg4 = %initialValueF : tensor<f32>, %arg5 = %initialValueI : tensor<i32>) -> (tensor<4xf32>, tensor<4xi32>) { - %1:2 = "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5) ( { - ^bb0(%arg6: tensor<f32>, %arg7: tensor<i32>, %arg8: tensor<f32>, %arg9: tensor<i32>): // no predecessors - %2 = xla_hlo.add %arg6, %arg8 : tensor<f32> - %3 = xla_hlo.add %arg7, %arg9 : tensor<i32> - "xla_hlo.return"(%2, %3) : (tensor<f32>, tensor<i32>) -> () - }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xi32>, tensor<f32>, tensor<i32>) -> (tensor<4xf32>, tensor<4xi32>) - flow.return %1#0, %1#1 : tensor<4xf32>, tensor<4xi32> - } invocation((%arg2, %arg3) : tensor<f32>, (%arg4, %arg5) : tensor<i32>) -> (tensor<f32>, tensor<i32>) { - %1 = xla_hlo.add %arg2, %arg3 : tensor<f32> - %2 = xla_hlo.add %arg4, %arg5 : tensor<i32> - flow.return %1, %2 : tensor<f32>, tensor<i32> - } {dimensions = dense<1> : vector<1xi32>} - return -}
diff --git a/iree/compiler/Dialect/Flow/Transforms/BUILD b/iree/compiler/Dialect/Flow/Transforms/BUILD index 2daf940..d4982ef 100644 --- a/iree/compiler/Dialect/Flow/Transforms/BUILD +++ b/iree/compiler/Dialect/Flow/Transforms/BUILD
@@ -26,12 +26,10 @@ "FoldCompatibleDispatchRegions.cpp", "FormStreams.cpp", "IdentifyDispatchRegions.cpp", - "IdentifyReductionRegions.cpp", "LegalizeInputTypes.cpp", "MaterializeExportedReflection.cpp", "MergeExportedReflection.cpp", "OutlineDispatchRegions.cpp", - "OutlineReductionRegions.cpp", "Passes.cpp", "PrePostPartitioningConversion.cpp", "RematerializeDispatchConstants.cpp",
diff --git a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt index 406aea1..5c47dd9 100644 --- a/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt +++ b/iree/compiler/Dialect/Flow/Transforms/CMakeLists.txt
@@ -26,12 +26,10 @@ "FoldCompatibleDispatchRegions.cpp" "FormStreams.cpp" "IdentifyDispatchRegions.cpp" - "IdentifyReductionRegions.cpp" "LegalizeInputTypes.cpp" "MaterializeExportedReflection.cpp" "MergeExportedReflection.cpp" "OutlineDispatchRegions.cpp" - "OutlineReductionRegions.cpp" "Passes.cpp" "PrePostPartitioningConversion.cpp" "RematerializeDispatchConstants.cpp"
diff --git a/iree/compiler/Dialect/Flow/Transforms/IdentifyReductionRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/IdentifyReductionRegions.cpp deleted file mode 100644 index 41d35fe..0000000 --- a/iree/compiler/Dialect/Flow/Transforms/IdentifyReductionRegions.cpp +++ /dev/null
@@ -1,184 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <algorithm> - -#include "iree/compiler/Dialect/Flow/IR/FlowOps.h" -#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h" -#include "iree/compiler/Dialect/Flow/Utils/WorkloadUtils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassRegistry.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Support/LogicalResult.h" -#include "mlir/Transforms/Utils.h" -#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h" - -namespace mlir { -namespace iree_compiler { -namespace IREE { -namespace Flow { - -namespace { - -// Builds a new reduction region with the given |invocationRegion|. -// The new region will be inserted after |originalOp|. -// -// All |invocationRegion| ops must be compatible with the |workload| specified -// as they will all be dispatched with the same workgroup structure. The -// |invocationRegion| will not be modified. -LogicalResult buildReductionRegion(Operation *dispatchOp, - ArrayRef<Value> operands, - ArrayRef<Value> initialValues, - ArrayRef<int32_t> dimensions, - Region &invocationRegion) { - OpBuilder parentBuilder(dispatchOp); - - // Compute the workload based on the output shape. - // When variadic all output shapes match so we can just take the first. - auto workload = calculateWorkload(dispatchOp, dispatchOp->getResult(0)); - - // Build the region op and add it to the parent block. - auto reductionRegionOp = parentBuilder.create<ReductionRegionOp>( - dispatchOp->getLoc(), dispatchOp->getResultTypes(), workload, operands, - initialValues, dimensions); - - // Clone the dispatch op (xla_hlo.reduce, etc) into the dispatch region. This - // way we preserve the original op all the way through the pipeline while - // still exposing it with standardized attributes for the later scheduler - // passes. - OpBuilder dispatchBuilder(dispatchOp->getContext()); - auto *dispatchBlock = - dispatchBuilder.createBlock(&reductionRegionOp.dispatch()); - BlockAndValueMapping dispatchMapping; - for (auto operand : operands) { - dispatchMapping.map(operand, dispatchBlock->addArgument(operand.getType())); - } - for (auto initialValue : initialValues) { - dispatchMapping.map(initialValue, - dispatchBlock->addArgument(initialValue.getType())); - } - auto *clonedOp = dispatchBuilder.clone(*dispatchOp, dispatchMapping); - dispatchBuilder.create<ReturnOp>(dispatchOp->getLoc(), - clonedOp->getResults()); - - // Create the block and setup the arg mapping for captured values. - BlockAndValueMapping invocationMapping; - invocationRegion.cloneInto(&reductionRegionOp.invocation(), - invocationMapping); - - // Replace xla_hlo.return -> flow.return. - OpBuilder regionBuilder(reductionRegionOp.invocation()); - reductionRegionOp.invocation().walk([&](xla_hlo::ReturnOp returnOp) { - regionBuilder.setInsertionPoint(returnOp); - regionBuilder.create<ReturnOp>(returnOp.getLoc(), returnOp.getOperands()); - returnOp.erase(); - }); - - // Replace usage of values with the results of the region. - for (int i = 0; i < dispatchOp->getNumResults(); ++i) { - dispatchOp->getResult(i).replaceAllUsesWith(reductionRegionOp.getResult(i)); - } - - return success(); -} - -// Converts an xla_hlo::ReduceOp to a reduction region and inlines the target -// computation into the region body. -LogicalResult buildReductionRegionFromXLAReduceOp(xla_hlo::ReduceOp reduceOp) { - SmallVector<Value, 4> operands(reduceOp.getOperands()); - OperandAdaptor<xla_hlo::ReduceOp> adaptor(operands); - - SmallVector<int32_t, 4> dimensions; - for (auto dim : reduceOp.dimensions().getIntValues()) { - dimensions.push_back(dim.getSExtValue()); - } - - // Create the reduction region op with the reduction computation. - if (failed(buildReductionRegion(reduceOp, adaptor.operands(), - adaptor.init_values(), dimensions, - reduceOp.body()))) { - return failure(); - } - - // Remove original XLA reduction op. - reduceOp.erase(); - - return success(); -} - -// Identifies reduction ops and moves them into reduction regions. -LogicalResult identifyBlockReductionRegions(FuncOp funcOp, Block *block) { - // Fixed point iteration until we can no longer fuse anything. - bool didFindAnyNewRegions; - do { - // Iterate in reverse so we root further along in the op list. - didFindAnyNewRegions = false; - for (auto &rootOp : llvm::reverse(*block)) { - if (auto reduceOp = dyn_cast<xla_hlo::ReduceOp>(rootOp)) { - if (failed(buildReductionRegionFromXLAReduceOp(reduceOp))) { - return failure(); - } - - // Successfully created a dispatch region from the ops and we must now - // start over again as we've likely trashed the whole block structure. - didFindAnyNewRegions = true; - break; - } - } - } while (didFindAnyNewRegions); - return success(); -} - -} // namespace - -// Identifies reduction ops and moves their targets into reduction regions. -class IdentifyReductionRegionsPass - : public ModulePass<IdentifyReductionRegionsPass> { - public: - void runOnModule() override { - for (auto funcOp : getModule().getOps<FuncOp>()) { - for (auto &block : funcOp) { - if (failed(identifyBlockReductionRegions(funcOp, &block))) { - return signalPassFailure(); - } - } - } - } -}; - -std::unique_ptr<OpPassBase<ModuleOp>> createIdentifyReductionRegionsPass() { - return std::make_unique<IdentifyReductionRegionsPass>(); // NOLINT -} - -static PassRegistration<IdentifyReductionRegionsPass> pass( - "iree-flow-identify-reduction-regions", - "Identifies reduction regions based on input reduction ops"); - -} // namespace Flow -} // namespace IREE -} // namespace iree_compiler -} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineReductionRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineReductionRegions.cpp deleted file mode 100644 index bee1284..0000000 --- a/iree/compiler/Dialect/Flow/Transforms/OutlineReductionRegions.cpp +++ /dev/null
@@ -1,353 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <utility> - -#include "iree/compiler/Dialect/Flow/IR/FlowOps.h" -#include "iree/compiler/Dialect/Flow/Utils/DispatchUtils.h" -#include "iree/compiler/Dialect/Flow/Utils/WorkloadUtils.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/Pass/Pass.h" - -namespace mlir { -namespace iree_compiler { -namespace IREE { -namespace Flow { - -namespace { - -// Determines the shapes involved with reducing this dimension. -SmallVector<int64_t, 4> calculateResultShape(Value input, int windowDimension) { - SmallVector<int64_t, 4> resultShape; - for (auto it : - llvm::enumerate(input.getType().cast<ShapedType>().getShape())) { - if (it.index() != windowDimension) { - resultShape.push_back(it.value()); - } - } - return resultShape; -} - -// Converts a reduction_region into a dispatch to the outlined region function -// for a single reduction dimension. -// Returns the results of the reduction or empty if the construction fails. -SmallVector<Value, 4> convertToDispatchOp( - Operation *regionOp, ExecutableOp executableOp, StringRef entryPointName, - int reductionDimension, SmallVector<Value, 4> initialValues, - SmallVector<Value, 4> inputs, OpBuilder &dispatcherBuilder) { - SmallVector<Type, 4> resultTypes; - for (auto resultType : llvm::enumerate(regionOp->getResultTypes())) { - // Allocate output buffer in the dispatcher to pass in to the region. - auto shapedType = resultType.value().cast<ShapedType>(); - auto reducedType = RankedTensorType::get( - calculateResultShape(inputs[resultType.index()], reductionDimension), - shapedType.getElementType()); - resultTypes.push_back(reducedType); - } - - // Calculate workload from the result shape. - auto workload = calculateWorkload(regionOp, regionOp->getResult(0)); - - // Create the reduce op to the executable function. - std::vector<Value> allOperands; - allOperands.insert(allOperands.end(), inputs.begin(), inputs.end()); - allOperands.insert(allOperands.end(), initialValues.begin(), - initialValues.end()); - auto dispatchOp = dispatcherBuilder.create<DispatchOp>( - regionOp->getLoc(), executableOp.getName(), entryPointName, workload, - resultTypes, allOperands); - - return dispatchOp.getResults(); -} - -// Creates an executable that holds the given elemental reduction region. -// The executable will have an entry point taking the specified reduction values -// and writing the results to output arguments. -std::pair<ExecutableOp, ReductionEntryOp> createReductionExecutable( - ReductionRegionOp regionOp, int outlinedRegionOrdinal, - int separatedReductionIndex, int reductionDimension, - SmallVector<Value, 4> initialValues, SmallVector<Value, 4> inputs, - llvm::StringMap<FuncOp> &dispatchableFuncOps) { - // Create a new entry point that we can use with the signature for this - // single dimension we are operating on. - SmallVector<Type, 8> allOperandTypes; - auto inputTypes = - llvm::map_range(inputs, [](Value value) { return value.getType(); }); - allOperandTypes.append(inputTypes.begin(), inputTypes.end()); - auto initialValueTypes = llvm::map_range( - initialValues, [](Value value) { return value.getType(); }); - allOperandTypes.append(initialValueTypes.begin(), initialValueTypes.end()); - SmallVector<Type, 4> resultTypes; - for (auto resultType : llvm::enumerate(regionOp.getResultTypes())) { - auto shapedType = resultType.value().cast<ShapedType>(); - auto reducedType = RankedTensorType::get( - calculateResultShape(inputs[resultType.index()], reductionDimension), - shapedType.getElementType()); - resultTypes.push_back(reducedType); - } - auto dispatchFuncType = - FunctionType::get(allOperandTypes, resultTypes, regionOp.getContext()); - - // Create the dispatch and invocation functions. - auto parentFuncOp = regionOp.getParentOfType<FuncOp>(); - std::string namePrefix = parentFuncOp.getName().str() + "_reduce_" + - std::to_string(outlinedRegionOrdinal) + "_dim_" + - std::to_string(separatedReductionIndex); - std::string dispatchFuncName = namePrefix + "_dispatch"; - auto dispatchFuncOp = - FuncOp::create(regionOp.getLoc(), dispatchFuncName, dispatchFuncType); - std::string invocationFuncName = namePrefix + "_invocation"; - auto invocationFuncOp = createRegionFunction( - regionOp.getLoc(), invocationFuncName, regionOp.invocation()); - - // Create the executable with the region cloned into it. - auto executableOp = createExecutable( - regionOp.getLoc(), namePrefix, {dispatchFuncOp, invocationFuncOp}, - parentFuncOp.getParentOfType<ModuleOp>(), dispatchableFuncOps); - executableOp.getOperation()->moveBefore(parentFuncOp); - - // Add dispatch export pointing at the function. - OpBuilder builder(executableOp.body()); - auto entryPointOp = builder.create<ReductionEntryOp>( - regionOp.getLoc(), builder.getStringAttr(dispatchFuncOp.getName()), - builder.getSymbolRefAttr(dispatchFuncOp), - builder.getSymbolRefAttr(invocationFuncOp), - builder.getI32IntegerAttr(reductionDimension)); - - return {executableOp, entryPointOp}; -} - -// Outlines a reduction region into one or more executables. -// This separates the reduction into multiple dispatches, one for each reduction -// dimension (thankfully XLA's operation semantics state this is ok). We then -// special case the first dispatch such that it takes the constant initial -// values so that we don't have to materialize a buffer for them. -LogicalResult outlineReductionRegion( - ReductionRegionOp regionOp, int outlinedRegionOrdinal, - llvm::StringMap<FuncOp> &dispatchableFuncOps) { - // Insert at the same place as the original region. - OpBuilder dispatcherBuilder(regionOp); - - SmallVector<Value, 4> temps{regionOp.operands()}; - - // Create one dispatch per dimension being reduced. - // We'll do this by chaining the original input through with the temporary - // reduction results. The results we end up with will be the originally - // requested shape and we can just substitute them. - auto dimensions = regionOp.dimensions().getValue(); - SmallVector<int32_t, 4> sortedDimensions; - for (uint32_t i = 0; i < dimensions.getNumElements(); ++i) { - sortedDimensions.push_back(dimensions.getValue<IntegerAttr>({i}).getInt()); - } - llvm::sort(sortedDimensions, - [](int32_t a, int32_t b) { return (a - b) > 0; }); - for (auto dimension : llvm::enumerate(sortedDimensions)) { - // Create the executable with the region cloned into it. - ExecutableOp executableOp; - ReductionEntryOp entryPointOp; - std::tie(executableOp, entryPointOp) = createReductionExecutable( - regionOp, outlinedRegionOrdinal, dimension.index(), dimension.value(), - regionOp.initial_values(), temps, dispatchableFuncOps); - - // Finally convert the dispatch region into a dispatch to the outlined func. - temps = convertToDispatchOp(regionOp, executableOp, entryPointOp.getName(), - dimension.value(), regionOp.initial_values(), - std::move(temps), dispatcherBuilder); - if (temps.empty()) { - return regionOp.emitOpError() - << "failed to construct reduction for dimension " - << dimension.value(); - } - } - - // Replace uses of the existing results with the new results. - for (int i = 0; i < regionOp.getNumResults(); ++i) { - regionOp.getResult(i).replaceAllUsesWith(temps[i]); - } - - // Erase original region. - regionOp.erase(); - - return success(); -} - -// Creates an executable that holds the given elemental reduction region. -// The executable will have an entry point taking the specified reduction values -// and writing the results to output arguments. -std::pair<ExecutableOp, WindowedReductionEntryOp> -createWindowedReductionExecutable( - WindowedReductionRegionOp regionOp, int outlinedRegionOrdinal, - int separatedReductionIndex, int32_t windowDimension, int32_t windowStride, - int32_t baseDilation, int32_t windowDilation, - SmallVector<Value, 4> initialValues, SmallVector<Value, 4> inputs, - llvm::StringMap<FuncOp> &dispatchableFuncOps) { - // Create the dispatch and invocation functions. - auto parentFuncOp = regionOp.getParentOfType<FuncOp>(); - std::string namePrefix = parentFuncOp.getName().str() + "_reduce_" + - std::to_string(outlinedRegionOrdinal) + "_dim_" + - std::to_string(separatedReductionIndex); - std::string dispatchFuncName = namePrefix + "_dispatch"; - auto dispatchFuncOp = createRegionFunction( - regionOp.getLoc(), dispatchFuncName, regionOp.dispatch()); - std::string invocationFuncName = namePrefix + "_invocation"; - auto invocationFuncOp = createRegionFunction( - regionOp.getLoc(), invocationFuncName, regionOp.invocation()); - - // Create the executable with the region cloned into it. - auto executableOp = createExecutable( - regionOp.getLoc(), namePrefix, {dispatchFuncOp, invocationFuncOp}, - parentFuncOp.getParentOfType<ModuleOp>(), dispatchableFuncOps); - executableOp.getOperation()->moveBefore(parentFuncOp); - - // Add dispatch export pointing at the function. - OpBuilder builder(executableOp.body()); - auto entryPointOp = builder.create<WindowedReductionEntryOp>( - regionOp.getLoc(), builder.getStringAttr(dispatchFuncOp.getName()), - builder.getSymbolRefAttr(dispatchFuncOp), - builder.getSymbolRefAttr(invocationFuncOp), - builder.getI32IntegerAttr(windowDimension), - builder.getI32IntegerAttr(windowStride), - builder.getI32IntegerAttr(baseDilation), - builder.getI32IntegerAttr(windowDilation), - builder.getI32IntegerAttr( - static_cast<uint32_t>(regionOp.padding_mode()))); - - return {executableOp, entryPointOp}; -} - -// Outlines a windowed reduction region into one or more executables. -// This separates the reduction into multiple dispatches, one for each reduction -// dimension (thankfully XLA's operation semantics state this is ok). We then -// special case the first dispatch such that it takes the constant initial -// values so that we don't have to materialize a buffer for them. -LogicalResult outlineWindowedReductionRegion( - WindowedReductionRegionOp regionOp, int outlinedRegionOrdinal, - llvm::StringMap<FuncOp> &dispatchableFuncOps) { - // Insert at the same place as the original region. - OpBuilder dispatcherBuilder(regionOp); - - SmallVector<Value, 4> initialValues{regionOp.initial_values()}; - SmallVector<Value, 4> temps{regionOp.operands()}; - - // Create one dispatch per dimension being reduced. - // We'll do this by chaining the original input through with the temporary - // reduction results. The results we end up with will be the originally - // requested shape and we can just substitute them. - using WindowTuple = std::tuple<int32_t, int32_t, int32_t, int32_t>; - - auto windowDimensions = regionOp.window_dimensions(); - auto windowStrides = regionOp.window_strides(); - auto baseDilations = regionOp.base_dilations(); - auto windowDilations = regionOp.window_dilations(); - SmallVector<WindowTuple, 4> sortedWindowAttrs; - for (uint32_t i = 0; i < windowDimensions.getNumElements(); ++i) { - int32_t windowDimension = - windowDimensions.getValue<IntegerAttr>({i}).getInt(); - int32_t windowStride = windowStrides.getValue<IntegerAttr>({i}).getInt(); - int32_t baseDilation = baseDilations.getValue<IntegerAttr>({i}).getInt(); - int32_t windowDilation = - windowDilations.getValue<IntegerAttr>({i}).getInt(); - sortedWindowAttrs.push_back(WindowTuple(windowDimension, windowStride, - baseDilation, windowDilation)); - } - llvm::sort(sortedWindowAttrs, [](WindowTuple a, WindowTuple b) { - return std::get<0>(a) - std::get<0>(b); - }); - for (auto windowAttrs : llvm::enumerate(sortedWindowAttrs)) { - int32_t windowDimension = std::get<0>(windowAttrs.value()); - int32_t windowStride = std::get<1>(windowAttrs.value()); - int32_t baseDilation = std::get<2>(windowAttrs.value()); - int32_t windowDilation = std::get<3>(windowAttrs.value()); - ExecutableOp executableOp; - WindowedReductionEntryOp entryPointOp; - std::tie(executableOp, entryPointOp) = createWindowedReductionExecutable( - regionOp, outlinedRegionOrdinal, windowAttrs.index(), windowDimension, - windowStride, baseDilation, windowDilation, initialValues, temps, - dispatchableFuncOps); - temps = convertToDispatchOp(regionOp, executableOp, entryPointOp.getName(), - windowDimension, initialValues, - std::move(temps), dispatcherBuilder); - if (temps.empty()) { - return regionOp.emitOpError() - << "failed to construct reduction for windowed dimension " - << windowDimension; - } - } - - // Replace uses of the existing results with the new results. - for (int i = 0; i < regionOp.getNumResults(); ++i) { - regionOp.getResult(i).replaceAllUsesWith(temps[i]); - } - - // Erase original region. - regionOp.erase(); - - return success(); -} - -} // namespace - -class OutlineReductionRegionsPass - : public ModulePass<OutlineReductionRegionsPass> { - public: - OutlineReductionRegionsPass() = default; - explicit OutlineReductionRegionsPass( - std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps) - : dispatchableFuncOps_(std::move(dispatchableFuncOps)) {} - - void runOnModule() override { - // TODO(benvanik): replace with a pattern rewriter? - auto funcOps = llvm::to_vector<32>(getModule().getOps<FuncOp>()); - for (auto funcOp : funcOps) { - SmallVector<ReductionRegionOp, 4> reductionRegionOps; - funcOp.walk( - [&](ReductionRegionOp op) { reductionRegionOps.push_back(op); }); - for (int i = 0; i < reductionRegionOps.size(); ++i) { - if (failed(outlineReductionRegion(reductionRegionOps[i], i, - *dispatchableFuncOps_))) { - return signalPassFailure(); - } - } - SmallVector<WindowedReductionRegionOp, 4> windowedReductionRegionOps; - funcOp.walk([&](WindowedReductionRegionOp op) { - windowedReductionRegionOps.push_back(op); - }); - for (int i = 0; i < windowedReductionRegionOps.size(); ++i) { - if (failed(outlineWindowedReductionRegion(windowedReductionRegionOps[i], - i, *dispatchableFuncOps_))) { - return signalPassFailure(); - } - } - } - } - - private: - std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps_; -}; - -std::unique_ptr<OpPassBase<ModuleOp>> createOutlineReductionRegionsPass( - std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps) { - return std::make_unique<OutlineReductionRegionsPass>( - std::move(dispatchableFuncOps)); // NOLINT -} - -static PassRegistration<OutlineReductionRegionsPass> pass( - "iree-flow-outline-reduction-regions", - "Outlines reduction regions into standalone functions"); - -} // namespace Flow -} // namespace IREE -} // namespace iree_compiler -} // namespace mlir
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp index 872c572..91a2e69 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
@@ -25,11 +25,6 @@ namespace IREE { namespace Flow { -static llvm::cl::opt<bool> experimentalDispatchReduce{ - "iree-flow-experimental-dispatch-reduce", - llvm::cl::desc("Enables reductions within dispatch regions"), - llvm::cl::init(false)}; - void buildFlowTransformPassPipeline(OpPassManager &passManager) { passManager.addPass(createCanonicalizerPass()); @@ -53,16 +48,8 @@ passManager.addNestedPass<FuncOp>( IREE::Flow::createPrePartitioningConversionPass()); - if (experimentalDispatchReduce) { - // Unroll multi-dimensional reductions to one reduction per dimension. - passManager.addNestedPass<FuncOp>(IREE::Flow::createUnrollReductionsPass()); - } else { - // Find reduction ops and create flow.reduction.regions. We do this prior to - // performing dispatch region identification so that we can build as big of - // fused reduction regions as possible. The remaining ops will be put into - // dispatch regions. - passManager.addPass(IREE::Flow::createIdentifyReductionRegionsPass()); - } + // Unroll multi-dimensional reductions to one reduction per dimension. + passManager.addNestedPass<FuncOp>(IREE::Flow::createUnrollReductionsPass()); passManager.addNestedPass<FuncOp>(createCSEPass()); // First perform module-level analysis that following passes will use to query @@ -87,8 +74,6 @@ // sequencer functions performing dispatches from the dispatchees. passManager.addPass( IREE::Flow::createOutlineDispatchRegionsPass(dispatchableFuncOps)); - passManager.addPass( - IREE::Flow::createOutlineReductionRegionsPass(dispatchableFuncOps)); // Cleanup identity ops that clutter up the IR and canonicalize. passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h index b83a600..42aaca9 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.h +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h
@@ -110,17 +110,6 @@ std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps); //===----------------------------------------------------------------------===// -// Reductions (flow.reduction.region) -//===----------------------------------------------------------------------===// - -// Identifies reduction regions and wraps them in flow.reduction_regions. -std::unique_ptr<OpPassBase<ModuleOp>> createIdentifyReductionRegionsPass(); - -// Outlines dispatch regions into executables. -std::unique_ptr<OpPassBase<ModuleOp>> createOutlineReductionRegionsPass( - std::shared_ptr<llvm::StringMap<FuncOp>> dispatchableFuncOps); - -//===----------------------------------------------------------------------===// // Optimizations //===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/assign_executable_workloads.mlir b/iree/compiler/Dialect/Flow/Transforms/test/assign_executable_workloads.mlir index 48bcd5d..9cd1007 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/assign_executable_workloads.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/assign_executable_workloads.mlir
@@ -18,24 +18,3 @@ %0 = flow.dispatch @singleStaticWorkload_ex_dispatch_0::@singleStaticWorkload_rgn_dispatch_0[%cst : vector<3xi32>](%arg0) : (tensor<4xf32>) -> tensor<4xf32> return %0 : tensor<4xf32> } - -// ----- - -flow.executable @reduction_ex_reduce_0_dim_0 { - // CHECK-LABEL: flow.reduction.entry @reduction_rgn_reduce_0_dim_0_entry - // CHECK-SAME: workload = dense<[4, 1, 1]> : vector<3xi32> - flow.reduction.entry @reduction_rgn_reduce_0_dim_0_entry apply(@reduction_rgn_reduce_0_dim_0) attributes {dimension = 1 : i32} - module { - func @reduction_rgn_reduce_0_dim_0_entry(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - func @reduction_rgn_reduce_0_dim_0(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { - %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> - return %0 : tensor<f32> - } - } -} -func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { - %cst = constant dense<0.000000e+00> : tensor<f32> - %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> - %0 = flow.dispatch @reduction_ex_reduce_0_dim_0::@reduction_rgn_reduce_0_dim_0_entry[%cst_0 : vector<3xi32>](%arg0, %cst) : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - return %0 : tensor<4xf32> -}
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_reduction_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_reduction_regions.mlir deleted file mode 100644 index 8dbb335..0000000 --- a/iree/compiler/Dialect/Flow/Transforms/test/identify_reduction_regions.mlir +++ /dev/null
@@ -1,55 +0,0 @@ -// RUN: iree-opt -split-input-file -iree-flow-identify-reduction-regions %s | IreeFileCheck %s - -// CHECK-LABEL: @single_reduction -func @single_reduction(%arg0 : tensor<4x8xf32>) -> tensor<4xf32> { - // CHECK-DAG: [[INITIAL:%.+]] = constant dense<0.000000e+00> - %0 = constant dense<0.000000e+00> : tensor<f32> - // CHECK-DAG: constant dense<[4, 1, 1]> - // CHECK-NEXT: [[RESULT:%.+]] = flow.reduction.region - // CHECK-SAME: [%cst_0 : vector<3xi32>] - // CHECK-SAME: (%arg1 = %arg0 : tensor<4x8xf32>, %arg2 = [[INITIAL]] : tensor<f32>) -> tensor<4xf32> - // CHECK-NEXT: = "xla_hlo.reduce"(%arg1, %arg2) - %1 = "xla_hlo.reduce"(%arg0, %0) ( { - // CHECK: invocation((%arg1, %arg2) : tensor<f32>) -> tensor<f32> { - ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>): - // CHECK-NEXT: %1 = xla_hlo.add %arg1, %arg2 : tensor<f32> - %2 = xla_hlo.add %arg1, %arg2 : tensor<f32> - // CHECK-NEXT: flow.return %1 : tensor<f32> - "xla_hlo.return"(%2) : (tensor<f32>) -> () - // CHECK-NEXT: } {dimensions = dense<1> : vector<1xi32>} - }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - // CHECK: return [[RESULT]] : tensor<4xf32> - return %1 : tensor<4xf32> -} - -// ----- - -// CHECK-LABEL: @multi_reduction -func @multi_reduction(%arg0 : tensor<4x8xf32>, %arg1 : tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>) { - // CHECK-DAG: [[INITIALA:%.+]] = constant dense<0.000000e+00> - %0 = constant dense<0.000000e+00> : tensor<f32> - // CHECK-DAG: [[INITIALB:%.+]] = constant dense<1.000000e+00> - %1 = constant dense<1.000000e+00> : tensor<f32> - // CHECK: constant dense<[4, 1, 1]> - // CHECK-NEXT: [[RESULT:%.+]]:2 = flow.reduction.region - // CHECK-SAME: [%cst_1 : vector<3xi32>] - // CHECK-SAME: (%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>, %arg4 = [[INITIALA]] : tensor<f32>, %arg5 = [[INITIALB]] : tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) - // CHECK-NEXT: = "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5) - %2, %3 = "xla_hlo.reduce"(%arg0, %arg1, %0, %1) ( { - // CHECK: invocation((%arg2, %arg3) : tensor<f32>, (%arg4, %arg5) : tensor<f32>) -> (tensor<f32>, tensor<f32>) { - ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>): - // CHECK-NEXT: %1 = xla_hlo.add %arg2, %arg4 : tensor<f32> - %4 = xla_hlo.add %arg0_lhs, %arg0_rhs : tensor<f32> - // CHECK-NEXT: %2 = xla_hlo.add %arg3, %arg5 : tensor<f32> - %5 = xla_hlo.add %arg1_lhs, %arg1_rhs : tensor<f32> - // CHECK-NEXT: flow.return %1, %2 : tensor<f32>, tensor<f32> - "xla_hlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> () - // CHECK-NEXT: } {dimensions = dense<1> : vector<1xi32>} - }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) - // CHECK: return [[RESULT]]#0, [[RESULT]]#1 : tensor<4xf32>, tensor<4xf32> - return %2, %3 : tensor<4xf32>, tensor<4xf32> -} - -// ----- - -// TODO(benvanik): windowed reduction.
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/outline_reduction_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/outline_reduction_regions.mlir deleted file mode 100644 index f05e169..0000000 --- a/iree/compiler/Dialect/Flow/Transforms/test/outline_reduction_regions.mlir +++ /dev/null
@@ -1,123 +0,0 @@ -// RUN: iree-opt -split-input-file -iree-flow-outline-reduction-regions -cse %s | IreeFileCheck %s - -func @single_reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { - %cst = constant dense<0.000000e+00> : tensor<f32> - %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> - %0 = flow.reduction.region[%cst_0 : vector<3xi32>](%arg1 = %arg0 : tensor<4x8xf32>, %arg2 = %cst : tensor<f32>) -> tensor<4xf32> { - %1 = "xla_hlo.reduce"(%arg1, %arg2) ( { - ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors - %2 = xla_hlo.add %arg3, %arg4 : tensor<f32> - "xla_hlo.return"(%2) : (tensor<f32>) -> () - }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - flow.return %1 : tensor<4xf32> - } invocation((%arg1, %arg2) : tensor<f32>) -> tensor<f32> { - %1 = xla_hlo.add %arg1, %arg2 : tensor<f32> - flow.return %1 : tensor<f32> - } {dimensions = dense<1> : vector<1xi32>} - return %0 : tensor<4xf32> -} - -// CHECK-LABEL: flow.executable @single_reduction_reduce_0_dim_0 { -// CHECK-NEXT: flow.reduction.entry @single_reduction_reduce_0_dim_0_dispatch apply(@single_reduction_reduce_0_dim_0_invocation) attributes {dimension = 1 : i32} -// CHECK-NEXT: module { -// CHECK-NEXT: func @single_reduction_reduce_0_dim_0_dispatch(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: func @single_reduction_reduce_0_dim_0_invocation(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> -// CHECK-NEXT: return %0 : tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: func @single_reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { -// CHECK-DAG: %cst = constant dense<0.000000e+00> : tensor<f32> -// CHECK-DAG: %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> -// CHECK-NEXT: %0 = flow.dispatch @single_reduction_reduce_0_dim_0::@single_reduction_reduce_0_dim_0_dispatch[%cst_0 : vector<3xi32>](%arg0, %cst) : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: return %0 : tensor<4xf32> -// CHECK-NEXT: } - -// ----- - -func @unrolled_reduction(%arg0: tensor<4x2x8xf32>) -> tensor<4xf32> { - %cst = constant dense<0.000000e+00> : tensor<f32> - %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> - %0 = flow.reduction.region[%cst_0 : vector<3xi32>](%arg1 = %arg0 : tensor<4x2x8xf32>, %arg2 = %cst : tensor<f32>) -> tensor<4xf32> { - %1 = "xla_hlo.reduce"(%arg1, %arg2) ( { - ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>): // no predecessors - %2 = xla_hlo.add %arg3, %arg4 : tensor<f32> - "xla_hlo.return"(%2) : (tensor<f32>) -> () - }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<4x2x8xf32>, tensor<f32>) -> tensor<4xf32> - flow.return %1 : tensor<4xf32> - } invocation((%arg1, %arg2) : tensor<f32>) -> tensor<f32> { - %1 = xla_hlo.add %arg1, %arg2 : tensor<f32> - flow.return %1 : tensor<f32> - } {dimensions = dense<[1, 2]> : vector<2xi32>} - return %0 : tensor<4xf32> -} - -// CHECK-LABEL: flow.executable @unrolled_reduction_reduce_0_dim_0 { -// CHECK-NEXT: flow.reduction.entry @unrolled_reduction_reduce_0_dim_0_dispatch apply(@unrolled_reduction_reduce_0_dim_0_invocation) attributes {dimension = 2 : i32} -// CHECK-NEXT: module { -// CHECK-NEXT: func @unrolled_reduction_reduce_0_dim_0_dispatch(tensor<4x2x8xf32>, tensor<f32>) -> tensor<4x2xf32> -// CHECK-NEXT: func @unrolled_reduction_reduce_0_dim_0_invocation(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> -// CHECK-NEXT: return %0 : tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: flow.executable @unrolled_reduction_reduce_0_dim_1 { -// CHECK-NEXT: flow.reduction.entry @unrolled_reduction_reduce_0_dim_1_dispatch apply(@unrolled_reduction_reduce_0_dim_1_invocation) attributes {dimension = 1 : i32} -// CHECK-NEXT: module { -// CHECK-NEXT: func @unrolled_reduction_reduce_0_dim_1_dispatch(tensor<4x2xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: func @unrolled_reduction_reduce_0_dim_1_invocation(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> -// CHECK-NEXT: return %0 : tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: func @unrolled_reduction(%arg0: tensor<4x2x8xf32>) -> tensor<4xf32> { -// CHECK-DAG: %cst = constant dense<0.000000e+00> : tensor<f32> -// CHECK-DAG: %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> -// CHECK-NEXT: %0 = flow.dispatch @unrolled_reduction_reduce_0_dim_0::@unrolled_reduction_reduce_0_dim_0_dispatch[%cst_0 : vector<3xi32>](%arg0, %cst) : (tensor<4x2x8xf32>, tensor<f32>) -> tensor<4x2xf32> -// CHECK-NEXT: %1 = flow.dispatch @unrolled_reduction_reduce_0_dim_1::@unrolled_reduction_reduce_0_dim_1_dispatch[%cst_0 : vector<3xi32>](%0, %cst) : (tensor<4x2xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: return %1 : tensor<4xf32> -// CHECK-NEXT: } - -// ----- - -func @multi_reduction(%arg0: tensor<4x8xf32>, %arg1: tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>) { - %cst = constant dense<0.000000e+00> : tensor<f32> - %cst_0 = constant dense<1.000000e+00> : tensor<f32> - %cst_1 = constant dense<[4, 1, 1]> : vector<3xi32> - %0:2 = flow.reduction.region[%cst_1 : vector<3xi32>](%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>, %arg4 = %cst : tensor<f32>, %arg5 = %cst_0 : tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) { - %1:2 = "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5) ( { - ^bb0(%arg6: tensor<f32>, %arg7: tensor<f32>, %arg8: tensor<f32>, %arg9: tensor<f32>): // no predecessors - %2 = xla_hlo.add %arg6, %arg8 : tensor<f32> - %3 = xla_hlo.add %arg7, %arg9 : tensor<f32> - "xla_hlo.return"(%2, %3) : (tensor<f32>, tensor<f32>) -> () - }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) - flow.return %1#0, %1#1 : tensor<4xf32>, tensor<4xf32> - } invocation((%arg2, %arg3) : tensor<f32>, (%arg4, %arg5) : tensor<f32>) -> (tensor<f32>, tensor<i32>) { - %1 = xla_hlo.add %arg2, %arg4 : tensor<f32> - %2 = xla_hlo.add %arg3, %arg5 : tensor<f32> - flow.return %1, %2 : tensor<f32>, tensor<f32> - } {dimensions = dense<1> : vector<1xi32>} - return %0#0, %0#1 : tensor<4xf32>, tensor<4xf32> -} - -// CHECK-LABEL: flow.executable @multi_reduction_reduce_0_dim_0 { -// CHECK-NEXT: flow.reduction.entry @multi_reduction_reduce_0_dim_0_dispatch apply(@multi_reduction_reduce_0_dim_0_invocation) attributes {dimension = 1 : i32} -// CHECK-NEXT: module { -// CHECK-NEXT: func @multi_reduction_reduce_0_dim_0_dispatch(tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) -// CHECK-NEXT: func @multi_reduction_reduce_0_dim_0_invocation(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>, %arg3: tensor<f32>) -> (tensor<f32>, tensor<f32>) { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg2 : tensor<f32> -// CHECK-NEXT: %1 = xla_hlo.add %arg1, %arg3 : tensor<f32> -// CHECK-NEXT: return %0, %1 : tensor<f32>, tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: func @multi_reduction(%arg0: tensor<4x8xf32>, %arg1: tensor<4x8xf32>) -> (tensor<4xf32>, tensor<4xf32>) { -// CHECK-DAG: %cst = constant dense<0.000000e+00> : tensor<f32> -// CHECK-DAG: %cst_0 = constant dense<1.000000e+00> : tensor<f32> -// CHECK-DAG: %cst_1 = constant dense<[4, 1, 1]> : vector<3xi32> -// CHECK-NEXT: %0:2 = flow.dispatch @multi_reduction_reduce_0_dim_0::@multi_reduction_reduce_0_dim_0_dispatch[%cst_1 : vector<3xi32>](%arg0, %arg1, %cst, %cst_0) : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>) -// CHECK-NEXT: return %0#0, %0#1 : tensor<4xf32>, tensor<4xf32> -// CHECK-NEXT: }
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir index 5e8eb70..4b1dee9 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
@@ -160,28 +160,28 @@ return %1 : tensor<4xf32> } -// CHECK-LABEL: flow.executable @reduction_reduce_0_dim_0 { -// CHECK-NEXT: flow.reduction.entry @reduction_reduce_0_dim_0_dispatch apply(@reduction_reduce_0_dim_0_invocation) attributes { -// CHECK-SAME: dimension = 1 : i32, -// CHECK-SAME: workload = dense<[4, 1, 1]> -// CHECK-SAME: } -// CHECK-NEXT: module { -// CHECK-NEXT: func @reduction_reduce_0_dim_0_dispatch(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: func @reduction_reduce_0_dim_0_invocation(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> -// CHECK-NEXT: return %0 : tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { -// CHECK-NEXT: %cst = constant dense<0.000000e+00> : tensor<f32> -// CHECK-NEXT: %cst_0 = constant dense<[4, 1, 1]> : vector<3xi32> -// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg1 = %cst_0 : vector<3xi32>, %arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %cst : tensor<f32>) -> tensor<4xf32> { -// CHECK-NEXT: %1 = flow.dispatch @reduction_reduce_0_dim_0::@reduction_reduce_0_dim_0_dispatch[%arg1 : vector<3xi32>](%arg2, %arg3) : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> -// CHECK-NEXT: flow.return %1 : tensor<4xf32> -// CHECK-NEXT: } -// CHECK-NEXT: return %0 : tensor<4xf32> -// CHECK-NEXT: } +// CHECK-LABEL: flow.executable @reduction_ex_dispatch_0 { +// CHECK-NEXT: flow.dispatch.entry @reduction_ex_dispatch_0 attributes {workload = dense<[4, 1, 1]> : vector<3xi32>} +// CHECK-NEXT: module { +// CHECK-NEXT: func @reduction_ex_dispatch_0(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { +// CHECK-NEXT: %cst = constant dense<0.000000e+00> : tensor<f32> +// CHECK-NEXT: %0 = "xla_hlo.reduce"(%arg0, %cst) ( { +// CHECK-NEXT: ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors +// CHECK-NEXT: %1 = xla_hlo.add %arg1, %arg2 : tensor<f32> +// CHECK-NEXT: "xla_hlo.return"(%1) : (tensor<f32>) -> () +// CHECK-NEXT: }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> +// CHECK-NEXT: return %0 : tensor<4xf32> +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: func @reduction(%arg0: tensor<4x8xf32>) -> tensor<4xf32> { +// CHECK-NEXT: %cst = constant dense<[4, 1, 1]> : vector<3xi32> +// CHECK-NEXT: %0 = flow.ex.stream.fragment(%arg1 = %cst : vector<3xi32>, %arg2 = %arg0 : tensor<4x8xf32>) -> tensor<4xf32> { +// CHECK-NEXT: %1 = flow.dispatch @reduction_ex_dispatch_0::@reduction_ex_dispatch_0[%arg1 : vector<3xi32>](%arg2) : (tensor<4x8xf32>) -> tensor<4xf32> +// CHECK-NEXT: flow.return %1 : tensor<4xf32> +// CHECK-NEXT: } +// CHECK-NEXT: return %0 : tensor<4xf32> +// CHECK-NEXT: } // -----
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTarget.cpp index 4ed3f3b..7a2a93f 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTarget.cpp
@@ -49,8 +49,6 @@ for (auto& op : executableOp.getBlock().getOperations()) { if (auto entryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) { entryPointNames.push_back(std::string(entryOp.function_ref())); - } else if (auto entryOp = dyn_cast<IREE::Flow::ReductionEntryOp>(op)) { - entryPointNames.push_back(std::string(entryOp.function_ref())); } } return entryPointNames;
diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp index e726c9e..9e5af90 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
@@ -102,8 +102,6 @@ for (auto &op : executableOp.getBlock().getOperations()) { if (auto entryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(op)) { entryPointNames.push_back(std::string(entryOp.function_ref())); - } else if (auto entryOp = dyn_cast<IREE::Flow::ReductionEntryOp>(op)) { - entryPointNames.push_back(std::string(entryOp.function_ref())); } } return entryPointNames; @@ -214,14 +212,6 @@ auto workGroupSize = funcOp.getAttrOfType<DenseIntElementsAttr>( "iree.executable.workgroup_size"); targetEntryOp.setAttr("workgroup_size", workGroupSize); - } else if (auto entryOp = dyn_cast<IREE::Flow::ReductionEntryOp>(&op)) { - auto targetEntryOp = - executableOp.lookupSymbol<IREE::HAL::ExecutableEntryPointOp>( - entryOp.sym_name()); - auto funcOp = moduleOp.lookupSymbol<FuncOp>(entryOp.function_ref()); - auto workGroupSize = funcOp.getAttrOfType<DenseIntElementsAttr>( - "iree.executable.workgroup_size"); - targetEntryOp.setAttr("workgroup_size", workGroupSize); } } }
diff --git a/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir b/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir index c647802..d9b2169 100644 --- a/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir +++ b/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir
@@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -iree-hal-transformation-pipeline -iree-hal-target-backends=vulkan-spirv -iree-flow-experimental-dispatch-reduce %s | IreeFileCheck %s -check-prefix=VKSPV +// RUN: iree-opt -split-input-file -iree-hal-transformation-pipeline -iree-hal-target-backends=vulkan-spirv %s | IreeFileCheck %s -check-prefix=VKSPV flow.executable @simpleMath_ex_dispatch_0 { flow.dispatch.entry @simpleMath_rgn_dispatch_0 attributes {
diff --git a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp index c3fcb2c..b837b4d 100644 --- a/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp +++ b/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
@@ -51,12 +51,6 @@ moduleOp.lookupSymbol<FuncOp>(dispatchEntryOp.function_ref()); entryFuncOps.push_back(funcOp); entryLocs.push_back(dispatchEntryOp.getLoc()); - } else if (auto reductionEntryOp = - dyn_cast<IREE::Flow::ReductionEntryOp>(op)) { - auto funcOp = - moduleOp.lookupSymbol<FuncOp>(reductionEntryOp.function_ref()); - entryFuncOps.push_back(funcOp); - entryLocs.push_back(reductionEntryOp.getLoc()); } } auto interfaceLoc = executableBuilder.getFusedLoc(entryLocs); @@ -208,26 +202,6 @@ builder.getI32IntegerAttr(nextOrdinal++), workGroupSizeAttr, builder.getSymbolRefAttr(interfaceOp), TypeAttr::get(sourceFuncOp.getType())); - } else if (auto reductionEntryOp = - dyn_cast<IREE::Flow::ReductionEntryOp>(op)) { - auto sourceFuncOp = sourceOp.getInnerModule().lookupSymbol<FuncOp>( - reductionEntryOp.function_ref()); - auto thunkFuncOp = createDispatchEntryThunk(sourceFuncOp, interfaceOp); - if (!thunkFuncOp.hasValue()) { - return failure(); - } - reductionEntryOp.setAttr( - "function_ref", builder.getSymbolRefAttr(thunkFuncOp.getValue())); - - builder.create<IREE::HAL::ExecutableEntryPointOp>( - reductionEntryOp.getLoc(), - builder.getStringAttr(thunkFuncOp->getName()), - builder.getI32IntegerAttr(nextOrdinal++), - DenseIntElementsAttr::get( - VectorType::get({3}, builder.getIntegerType(32)), - ArrayRef<int32_t>{1, 1, 1}), - builder.getSymbolRefAttr(interfaceOp), - TypeAttr::get(sourceFuncOp.getType())); } } return success();
diff --git a/iree/compiler/Dialect/HAL/Transforms/RewriteLegacyIO.cpp b/iree/compiler/Dialect/HAL/Transforms/RewriteLegacyIO.cpp index aaf942a..e54525c 100644 --- a/iree/compiler/Dialect/HAL/Transforms/RewriteLegacyIO.cpp +++ b/iree/compiler/Dialect/HAL/Transforms/RewriteLegacyIO.cpp
@@ -92,46 +92,6 @@ for (auto *deadOp : deadOps) deadOp->erase(); } -// TODO(b/150312935): remove this when the SPIR-V and LLVM targets use -// hal.interface. -static void makeLegacyExecutableReductionABI( - IREE::Flow::ReductionEntryOp reductionEntryOp, FuncOp thunkOp) { - auto *context = thunkOp.getContext(); - - auto implOp = thunkOp.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>( - (thunkOp.getName() + "_impl").str()); - implOp.setAttr("iree.executable.export", UnitAttr::get(context)); - implOp.setAttr("iree.executable.reduction", UnitAttr::get(context)); - implOp.setAttr("iree.executable.reduction.apply", - FlatSymbolRefAttr::get(reductionEntryOp.apply_ref(), context)); - implOp.setAttr("iree.executable.reduction.dimension", - IntegerAttr::get(IntegerType::get(32, context), - reductionEntryOp.dimension())); - - // Remove any blocks that may exist within the implementation function as the - // backend will be replacing the body with its own implementation. - implOp.getBlocks().clear(); - - // Destroy the IO op and replace with the original entry. - SymbolTable::setSymbolVisibility(implOp, SymbolTable::Visibility::Public); - auto originalName = thunkOp.getName(); - thunkOp.erase(); - implOp.setName(originalName); - - // Reset function type to memrefs with output args. - SmallVector<Type, 4> inputTypes; - for (const auto &oldType : implOp.getType().getInputs()) { - inputTypes.push_back( - convertLegacyTypeToMemRef(legalizeLegacyType(oldType))); - } - for (const auto &oldType : implOp.getType().getResults()) { - inputTypes.push_back( - convertLegacyTypeToMemRef(legalizeLegacyType(oldType))); - } - auto funcType = FunctionType::get(inputTypes, {}, context); - implOp.setType(funcType); -} - class RewriteLegacyIOPass : public OperationPass<RewriteLegacyIOPass, IREE::Flow::ExecutableOp> { public: @@ -144,9 +104,6 @@ if (auto entryOp = dyn_cast<IREE::Flow::DispatchEntryOp>(&op)) { auto thunkOp = moduleOp.lookupSymbol<FuncOp>(entryOp.function_ref()); makeLegacyExecutableDispatchABI(entryOp, thunkOp); - } else if (auto entryOp = dyn_cast<IREE::Flow::ReductionEntryOp>(&op)) { - auto thunkOp = moduleOp.lookupSymbol<FuncOp>(entryOp.function_ref()); - makeLegacyExecutableReductionABI(entryOp, thunkOp); } }
diff --git a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir index aab38bb..8fa24fd 100644 --- a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir +++ b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
@@ -35,46 +35,3 @@ // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"} } } - -// ----- - -// CHECK-LABEL: hal.executable @reduction_ex_reduce_0_dim_0 -// CHECK-DAG: hal.interface @legacy_io { -// CHECK-NEXT: hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" -// CHECK-NEXT: hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" -// CHECK-NEXT: hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" -// CHECK-NEXT: } -// CHECK-DAG: hal.executable.entry_point @reduction_rgn_reduce_0_dim_0_entry attributes { -// CHECK-SAME: interface = @legacy_io, -// CHECK-SAME: ordinal = 0 : i32, -// CHECK-SAME: signature = (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>, -// CHECK-SAME: workgroup_size = dense<1> : vector<3xi32> -// CHECK-SAME: } -// CHECK-DAG: hal.executable.source { -// CHECK-NEXT: module { -// CHECK-NEXT: flow.executable @reduction_ex_reduce_0_dim_0 -flow.executable @reduction_ex_reduce_0_dim_0 { - flow.reduction.entry @reduction_rgn_reduce_0_dim_0_entry apply(@reduction_rgn_reduce_0_dim_0) attributes { - dimension = 1 : i32, - workgroup_size = dense<[32, 1, 1]> : vector<3xi32>, - workload = dense<[4, 1, 1]> : vector<3xi32> - } - // CHECK: module { - module { - // CHECK-NEXT: func @reduction_rgn_reduce_0_dim_0_entry() { - // CHECK-NEXT: [[ZERO:%.+]] = constant 0 - // CHECK-NEXT: [[ARG0:%.+]] = hal.interface.load.tensor @legacy_io::@arg0, offset = [[ZERO]] : tensor<4x8xf32> - // CHECK-NEXT: [[ARG1:%.+]] = hal.interface.load.tensor @legacy_io::@arg1, offset = [[ZERO]] : tensor<f32> - // CHECK-NEXT: [[RET0:%.+]] = call @reduction_rgn_reduce_0_dim_0_entry_impl([[ARG0]], [[ARG1]]) : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - // CHECK-NEXT: hal.interface.store.tensor [[RET0]], @legacy_io::@ret0, offset = [[ZERO]] : tensor<4xf32> - // CHECK-NEXT: return - // CHECK-NEXT: } - // CHECK-NEXT: func @reduction_rgn_reduce_0_dim_0_entry_impl - func @reduction_rgn_reduce_0_dim_0_entry(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - func @reduction_rgn_reduce_0_dim_0(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { - %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> - return %0 : tensor<f32> - } - // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"} - } -}
diff --git a/iree/compiler/Dialect/HAL/Transforms/test/rewrite_legacy_io.mlir b/iree/compiler/Dialect/HAL/Transforms/test/rewrite_legacy_io.mlir index 699ca95..a8afb0a 100644 --- a/iree/compiler/Dialect/HAL/Transforms/test/rewrite_legacy_io.mlir +++ b/iree/compiler/Dialect/HAL/Transforms/test/rewrite_legacy_io.mlir
@@ -32,40 +32,3 @@ // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - -// ----- - -flow.executable @reduction_ex_reduce_0_dim_0 { - flow.reduction.entry @reduction_rgn_reduce_0_dim_0_entry apply(@reduction_rgn_reduce_0_dim_0) attributes {dimension = 1 : i32, workgroup_size = dense<[32, 1, 1]> : vector<3xi32>, workload = dense<[4, 1, 1]> : vector<3xi32>} - module { - func @reduction_rgn_reduce_0_dim_0_entry() { - %c0_i32 = constant 0 : i32 - %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0_i32 : tensor<4x8xf32> - %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0_i32 : tensor<f32> - %2 = call @reduction_rgn_reduce_0_dim_0_entry_impl(%0, %1) : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> - hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0_i32 : tensor<4xf32> - return - } - func @reduction_rgn_reduce_0_dim_0_entry_impl(tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32> attributes {sym_visibility = "private"} - func @reduction_rgn_reduce_0_dim_0(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { - %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> - return %0 : tensor<f32> - } - hal.interface @legacy_io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } -} - -// CHECK-LABEL: flow.executable @reduction_ex_reduce_0_dim_0 { -// CHECK-NEXT: flow.reduction.entry @reduction_rgn_reduce_0_dim_0_entry apply(@reduction_rgn_reduce_0_dim_0) attributes {dimension = 1 : i32, workgroup_size = dense<[32, 1, 1]> : vector<3xi32>, workload = dense<[4, 1, 1]> : vector<3xi32>} -// CHECK-NEXT: module { -// CHECK-NEXT: func @reduction_rgn_reduce_0_dim_0_entry(memref<4x8xf32>, memref<f32>, memref<4xf32>) attributes {iree.executable.export, iree.executable.reduction, iree.executable.reduction.apply = @reduction_rgn_reduce_0_dim_0, iree.executable.reduction.dimension = 1 : i32} -// CHECK-NEXT: func @reduction_rgn_reduce_0_dim_0(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> { -// CHECK-NEXT: %0 = xla_hlo.add %arg0, %arg1 : tensor<f32> -// CHECK-NEXT: return %0 : tensor<f32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: }
diff --git a/iree/hal/cts/BUILD b/iree/hal/cts/BUILD index b5dc016..0b6ce91 100644 --- a/iree/hal/cts/BUILD +++ b/iree/hal/cts/BUILD
@@ -35,6 +35,7 @@ "//iree/hal:driver_registry", # HAL driver modules. + "//iree/hal/vmla:vmla_driver_module", # build-cleaner: keep "//iree/hal/vulkan:vulkan_driver_module", # build-cleaner: keep # "//iree/hal/dawn:dawn_driver_module", # build-cleaner: keep ] + PLATFORM_VULKAN_TEST_DEPS,
diff --git a/test/e2e/linalg_path/reduce.mlir b/test/e2e/linalg_path/reduce.mlir index 76e743b..21b9a80 100644 --- a/test/e2e/linalg_path/reduce.mlir +++ b/test/e2e/linalg_path/reduce.mlir
@@ -1,4 +1,4 @@ -// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -split-input-file -iree-hal-target-backends=vulkan-spirv -iree-flow-experimental-dispatch-reduce %s | IreeFileCheck %s) +// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -split-input-file -iree-hal-target-backends=vulkan-spirv %s | IreeFileCheck %s) // CHECK-LABEL: EXEC @reduce_dim_1 // CHECK: 2xi32=25 50
diff --git a/test/e2e/xla/mnist.mlir b/test/e2e/xla/mnist.mlir index baa5a0a..caf8228 100644 --- a/test/e2e/xla/mnist.mlir +++ b/test/e2e/xla/mnist.mlir
@@ -1,7 +1,7 @@ // MNIST model with placeholder weights, for translation testing. -// RUN: iree-run-mlir -iree-hal-target-backends=vmla -iree-flow-experimental-dispatch-reduce %s -input-value="1x28x28x1xf32" | IreeFileCheck %s -// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -iree-hal-target-backends=vulkan-spirv -iree-flow-experimental-dispatch-reduce %s -input-value="1x28x28x1xf32" | IreeFileCheck %s) +// RUN: iree-run-mlir -iree-hal-target-backends=vmla %s -input-value="1x28x28x1xf32" | IreeFileCheck %s +// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -iree-hal-target-backends=vulkan-spirv %s -input-value="1x28x28x1xf32" | IreeFileCheck %s) module { // CHECK-LABEL: EXEC @main
diff --git a/test/e2e/xla/reduce_float.mlir b/test/e2e/xla/reduce_float.mlir index 270e882..db5813e 100644 --- a/test/e2e/xla/reduce_float.mlir +++ b/test/e2e/xla/reduce_float.mlir
@@ -1,5 +1,5 @@ -// RUN: iree-run-mlir -iree-hal-target-backends=vmla -iree-flow-experimental-dispatch-reduce %s | IreeFileCheck %s -// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir %s -iree-hal-target-backends=vulkan-spirv -iree-flow-experimental-dispatch-reduce ) +// RUN: iree-run-mlir -iree-hal-target-backends=vmla %s | IreeFileCheck %s +// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir %s -iree-hal-target-backends=vulkan-spirv ) // Float sum values from [1.0, 10.0] // CHECK-LABEL: EXEC @reduce_sum_1x10xf32
diff --git a/test/e2e/xla/reduce_int.mlir b/test/e2e/xla/reduce_int.mlir index 03141dc..37bef13 100644 --- a/test/e2e/xla/reduce_int.mlir +++ b/test/e2e/xla/reduce_int.mlir
@@ -1,5 +1,5 @@ -// RUN: iree-run-mlir -iree-hal-target-backends=vmla -iree-flow-experimental-dispatch-reduce %s | IreeFileCheck %s -// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -iree-hal-target-backends=vulkan-spirv -iree-flow-experimental-dispatch-reduce %s) +// RUN: iree-run-mlir -iree-hal-target-backends=vmla %s | IreeFileCheck %s +// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir -iree-hal-target-backends=vulkan-spirv %s) // Int sum values from [1, 10] // CHECK-LABEL: EXEC @reduce_sum_1x10xi32
diff --git a/test/e2e/xla/unidirectional_lstm.mlir b/test/e2e/xla/unidirectional_lstm.mlir index b74feee..b373880 100644 --- a/test/e2e/xla/unidirectional_lstm.mlir +++ b/test/e2e/xla/unidirectional_lstm.mlir
@@ -1,7 +1,7 @@ // An example LSTM exported from a python reference model with dummy weights. -// RUN: iree-run-mlir %s -iree-hal-target-backends=vmla -iree-flow-experimental-dispatch-reduce -input-value="1x5xf32=[0 1 0 3 4]" -input-value="1x5x2x2xf32=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]" -export-all=false | IreeFileCheck %s --implicit-check-not="[" --implicit-check-not="]" -// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir %s -iree-hal-target-backends=vulkan-spirv -input-value="1x5xf32=[0 1 0 3 4]" -input-value="1x5x2x2xf32=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]" -iree-flow-experimental-dispatch-reduce -export-all=false | IreeFileCheck %s --implicit-check-not="[" --implicit-check-not="]" ) +// RUN: iree-run-mlir %s -iree-hal-target-backends=vmla -input-value="1x5xf32=[0 1 0 3 4]" -input-value="1x5x2x2xf32=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]" -export-all=false | IreeFileCheck %s --implicit-check-not="[" --implicit-check-not="]" +// RUN: [[ $IREE_VULKAN_DISABLE == 1 ]] || (iree-run-mlir %s -iree-hal-target-backends=vulkan-spirv -input-value="1x5xf32=[0 1 0 3 4]" -input-value="1x5x2x2xf32=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]" -export-all=false | IreeFileCheck %s --implicit-check-not="[" --implicit-check-not="]" ) // Exported via the XLA HLO Importer // The resulting MLIR was modified by hand by changing all large constants to be