Merge main -> google * 8b3790d5b [spirv] Remove code for deprecated Linalg on buffers path (#5626) * 5e66689d3 Guessing at making MacOS happy. (#5654) * 9475b3aeb Merge google -> main (#5646) * e221e5e9e [Talks] 2020-08-20: IREE CodeGen; MLIR ODM (#5585) * 1696d8c42 Only add operand-fusion to MobileNetV2. (#5639) * 9eaa01bbd Integrate MLIR-EmitC at iml130/mlir-emitc@679d7183 (#5633) * 7d0ab9ad3 [spirv] Delete experimental matmul vectorization code (#5638) PiperOrigin-RevId: 370960412
diff --git a/README.md b/README.md index 71189a6..4d5a462 100644 --- a/README.md +++ b/README.md
@@ -58,9 +58,13 @@ We also have some public talks that explain IREE's concepts and architecture: +* 2020-08-20: IREE CodeGen: MLIR Open Design Meeting Presentation + ([recording](https://drive.google.com/file/d/1325zKXnNIXGw3cdWrDWJ1-bp952wvC6W/view?usp=sharing) + and + [slides](https://docs.google.com/presentation/d/1NetHjKAOYg49KixY5tELqFp6Zr2v8_ujGzWZ_3xvqC8/edit)) * 2020-03-18: Interactive HAL IR Walkthrough (Ben Vanik and core team) ([recording](https://drive.google.com/file/d/1_sWDgAPDfrGQZdxAapSA90AD1jVfhp-f/view?usp=sharing)) -* 2020-01-31: End-to-end MLIR Workflow in IREE +* 2020-01-31: End-to-end MLIR Workflow in IREE: MLIR Open Design Meeting Presentation ([recording](https://drive.google.com/open?id=1os9FaPodPI59uj7JJI3aXnTzkuttuVkR) and [slides](https://drive.google.com/open?id=1RCQ4ZPQFK9cVgu3IH1e5xbrBcqy7d_cEZ578j84OvYI))
diff --git a/SUBMODULE_VERSIONS.txt b/SUBMODULE_VERSIONS.txt index bdb740e..abb0e2d 100644 --- a/SUBMODULE_VERSIONS.txt +++ b/SUBMODULE_VERSIONS.txt
@@ -6,7 +6,7 @@ 88b845dee001723c4a0db1fe5477de735b6d3bb0 third_party/liburing 3d4a47eed849f540090e9699e1b4860977558c76 third_party/llvm-bazel bf9eef92b6cd71d262ac12ce6c4919271bd6c910 third_party/llvm-project -3c265bf59bf2515a63ec35571c66954349749a62 third_party/mlir-emitc +679d7183b657a24f48d16de1fcefb20d7cd1f6a2 third_party/mlir-emitc b2a23bf269d52976ff384a60a12826b541f1ebbe third_party/mlir-hlo 2b2bd45bbf9be04fd22ece5cc1f54679202e9257 third_party/pffft d8c7ee00a687ac369e62e2032514a93a9b413502 third_party/pybind11
diff --git a/build_tools/mako/configuration.py b/build_tools/mako/configuration.py index 6ad1d6f..256848d 100644 --- a/build_tools/mako/configuration.py +++ b/build_tools/mako/configuration.py
@@ -94,11 +94,15 @@ self.phones = phones -def get_pixel4_default_target_list(skipped_target=None, batch_config=None): +def get_pixel4_default_target_list(skipped_target=None, + batch_config=None, + compilation_flags=None): if skipped_target is None: skipped_target = [] if batch_config is None: batch_config = [] + if compilation_flags is None: + compilation_flags = [] targets = [ TargetInfo(driver="vmla", hal_target_backend="vmla", @@ -111,7 +115,6 @@ compilation_flags=[ "--iree-llvm-target-triple=aarch64-none-linux-android29", "--iree-flow-inline-constants-max-byte-length=2048", - "--iree-flow-dispatch-formation-enable-operand-fusion" ]), TargetInfo(driver="dylib", hal_target_backend="dylib-llvm-aot", @@ -120,7 +123,6 @@ compilation_flags=[ "--iree-llvm-target-triple=aarch64-none-linux-android29", "--iree-flow-inline-constants-max-byte-length=2048", - "--iree-flow-dispatch-formation-enable-operand-fusion" ], runtime_flags=[ "--dylib_worker_count=3", @@ -140,14 +142,20 @@ for target in targets: if target.mako_tag in batch_config: target.add_batch_flag(batch_config[target.mako_tag]) + if target.mako_tag in compilation_flags: + target.compilation_flags += compilation_flags[target.mako_tag] return targets -def get_s20_default_target_list(skipped_target=None, batch_config=None): +def get_s20_default_target_list(skipped_target=None, + batch_config=None, + compilation_flags=None): if skipped_target is None: skipped_target = [] if batch_config is None: batch_config = [] + if compilation_flags is None: + compilation_flags = [] targets = [ TargetInfo(driver="vmla", hal_target_backend="vmla", @@ -160,7 +168,6 @@ compilation_flags=[ "--iree-llvm-target-triple=aarch64-none-linux-android29", "--iree-flow-inline-constants-max-byte-length=2048", - "--iree-flow-dispatch-formation-enable-operand-fusion" ]), TargetInfo(driver="dylib", hal_target_backend="dylib-llvm-aot", @@ -169,7 +176,6 @@ compilation_flags=[ "--iree-llvm-target-triple=aarch64-none-linux-android29", "--iree-flow-inline-constants-max-byte-length=2048", - "--iree-flow-dispatch-formation-enable-operand-fusion" ], runtime_flags=[ "--dylib_worker_count=3", @@ -190,6 +196,8 @@ for target in targets: if target.mako_tag in batch_config: target.add_batch_flag(batch_config[target.mako_tag]) + if target.mako_tag in compilation_flags: + target.compilation_flags += compilation_flags[target.mako_tag] return targets @@ -221,13 +229,31 @@ model_path="mobilenet-v2/iree_input.mlir", flagfile_path="mobilenet-v2/flagfile", phones=[ - PhoneBenchmarkInfo(name="Pixel4", - benchmark_key="6338759231537152", - targets=get_pixel4_default_target_list( - skipped_target=["vlk2"])), - PhoneBenchmarkInfo(name="S20", - benchmark_key="5618403088793600", - targets=get_s20_default_target_list()), + PhoneBenchmarkInfo( + name="Pixel4", + benchmark_key="6338759231537152", + targets=get_pixel4_default_target_list( + skipped_target=["vlk2"], + compilation_flags={ + 'cpu': [ + "--iree-flow-dispatch-formation-enable-operand-fusion" + ], + 'cpu3t': [ + "--iree-flow-dispatch-formation-enable-operand-fusion" + ] + })), + PhoneBenchmarkInfo( + name="S20", + benchmark_key="5618403088793600", + targets=get_s20_default_target_list( + compilation_flags={ + 'cpu': [ + "--iree-flow-dispatch-formation-enable-operand-fusion" + ], + 'cpu3t': [ + "--iree-flow-dispatch-formation-enable-operand-fusion" + ] + })), ]), ModelBenchmarkInfo( name="mobilebert-f16",
diff --git a/iree/compiler/Conversion/Common/Attributes.h b/iree/compiler/Conversion/Common/Attributes.h deleted file mode 100644 index 9659f3e..0000000 --- a/iree/compiler/Conversion/Common/Attributes.h +++ /dev/null
@@ -1,31 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef IREE_COMPILER_CONVERSION_LINALGTOSPIRV_ATTRIBUTES_H_ -#define IREE_COMPILER_CONVERSION_LINALGTOSPIRV_ATTRIBUTES_H_ - -#include "llvm/ADT/StringRef.h" - -namespace mlir { -namespace iree_compiler { - -/// Attribute on a module op to denote the scheduling order of entry points. -/// The attribute value is expected to be an array of entry point name strings. -inline llvm::StringRef getEntryPointScheduleAttrName() { - return "hal.entry_point_schedule"; -} -} // namespace iree_compiler -} // namespace mlir - -#endif // IREE_COMPILER_CONVERSION_LINALGTOSPIRV_ATTRIBUTES_H_
diff --git a/iree/compiler/Conversion/Common/BUILD b/iree/compiler/Conversion/Common/BUILD index ce369b4..720c86a 100644 --- a/iree/compiler/Conversion/Common/BUILD +++ b/iree/compiler/Conversion/Common/BUILD
@@ -32,7 +32,6 @@ "VectorTransferOptimization.cpp", ], hdrs = [ - "Attributes.h", "LaunchConfig.h", "Passes.h", "Transforms.h",
diff --git a/iree/compiler/Conversion/Common/CMakeLists.txt b/iree/compiler/Conversion/Common/CMakeLists.txt index 4a29284..c77ce0c 100644 --- a/iree/compiler/Conversion/Common/CMakeLists.txt +++ b/iree/compiler/Conversion/Common/CMakeLists.txt
@@ -14,7 +14,6 @@ NAME Common HDRS - "Attributes.h" "LaunchConfig.h" "Passes.h" "Transforms.h"
diff --git a/iree/compiler/Conversion/Common/LaunchConfig.cpp b/iree/compiler/Conversion/Common/LaunchConfig.cpp index 2462574..c17c51f 100644 --- a/iree/compiler/Conversion/Common/LaunchConfig.cpp +++ b/iree/compiler/Conversion/Common/LaunchConfig.cpp
@@ -24,7 +24,6 @@ #include "iree/compiler/Conversion/Common/LaunchConfig.h" #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "llvm/Support/FormatVariadic.h" #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
diff --git a/iree/compiler/Conversion/Common/Transforms.cpp b/iree/compiler/Conversion/Common/Transforms.cpp index 443eef0..a9dc928 100644 --- a/iree/compiler/Conversion/Common/Transforms.cpp +++ b/iree/compiler/Conversion/Common/Transforms.cpp
@@ -23,7 +23,6 @@ #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" #include "iree/compiler/Conversion/CodegenUtils/TransformUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/GPU/GPUDialect.h"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndDistributePass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndDistributePass.cpp index 44e96fd..c43ef16 100644 --- a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndDistributePass.cpp +++ b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndDistributePass.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/Common/Transforms.h" #include "iree/compiler/Conversion/LinalgToLLVM/KernelDispatch.h" #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/MaterializeCPULaunchConfigurationPass.cpp b/iree/compiler/Conversion/LinalgToLLVM/MaterializeCPULaunchConfigurationPass.cpp index bd45763..0071519 100644 --- a/iree/compiler/Conversion/LinalgToLLVM/MaterializeCPULaunchConfigurationPass.cpp +++ b/iree/compiler/Conversion/LinalgToLLVM/MaterializeCPULaunchConfigurationPass.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/Common/Transforms.h" #include "iree/compiler/Conversion/LinalgToLLVM/KernelDispatch.h" #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp index 6b2ff1b..4deba72 100644 --- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp +++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Conversion/Common/Passes.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/HLOToHLO/Passes.h" #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h" #include "iree/compiler/Dialect/Shape/Transforms/Passes.h"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/BUILD b/iree/compiler/Conversion/LinalgToSPIRV/BUILD index 7cbc3cc..5434383 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/BUILD +++ b/iree/compiler/Conversion/LinalgToSPIRV/BUILD
@@ -39,10 +39,7 @@ "ConvertToSPIRVPass.cpp", "FoldGPUProcessorIDUses.cpp", "KernelDispatchUtils.cpp", - "LinalgTileAndDistributePass.cpp", - "MatMulVectorizationTest.cpp", "Passes.cpp", - "SplitDispatchFunctionPass.cpp", "TileAndVectorizeInOneWorkgroupPass.cpp", "Utils.cpp", "VectorToCooperativeMatrixPass.cpp",
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/CMakeLists.txt b/iree/compiler/Conversion/LinalgToSPIRV/CMakeLists.txt index ea663f5..dded958 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/CMakeLists.txt +++ b/iree/compiler/Conversion/LinalgToSPIRV/CMakeLists.txt
@@ -36,10 +36,7 @@ "ConvertToSPIRVPass.cpp" "FoldGPUProcessorIDUses.cpp" "KernelDispatchUtils.cpp" - "LinalgTileAndDistributePass.cpp" - "MatMulVectorizationTest.cpp" "Passes.cpp" - "SplitDispatchFunctionPass.cpp" "TileAndVectorizeInOneWorkgroupPass.cpp" "Utils.cpp" "VectorToCooperativeMatrixPass.cpp"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.cpp b/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.cpp index 1e2be3b..b901721 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.cpp
@@ -20,12 +20,6 @@ namespace iree_compiler { SPIRVCodegenOptions getSPIRVCodegenOptionsFromClOptions() { - static llvm::cl::opt<bool> clEnableVectorization( - "iree-spirv-enable-vectorization", - llvm::cl::desc( - "Enable vectorization transformations in SPIR-V code generation"), - llvm::cl::init(false)); - static llvm::cl::list<unsigned> clWorkgroupTileSizes( "iree-spirv-workgroup-tile-size", llvm::cl::desc("Set tile sizes to use for each workgroup when tiling " @@ -48,11 +42,6 @@ llvm::cl::desc("Set workgroup size to use for SPIR-V code generation"), llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated); - static llvm::cl::opt<bool> clEnableLinalgOnTensorsSPIRV( - "iree-codegen-spirv-experimental-linalg-on-tensors", - llvm::cl::desc("Enable the linalg on tensors on SPIR-V path"), - llvm::cl::init(true)); - SPIRVCodegenOptions options; options.workgroupSize.assign(clWorkgroupSizes.begin(), clWorkgroupSizes.end()); @@ -60,10 +49,7 @@ clWorkgroupTileSizes.end()); options.invocationTileSizes.assign(clInvocationTileSizes.begin(), clInvocationTileSizes.end()); - options.enableVectorization = - clEnableLinalgOnTensorsSPIRV || clEnableVectorization; options.useWorkgroupMemory = clUseWorkgroupMemory; - options.usingLinalgOnTensors = clEnableLinalgOnTensorsSPIRV; return options; }
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h b/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h index c19220f..40df44c 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h +++ b/iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h
@@ -37,9 +37,7 @@ llvm::SmallVector<unsigned, 3> workgroupTileSizes = {}; llvm::SmallVector<unsigned, 3> invocationTileSizes = {}; - bool enableVectorization = false; bool useWorkgroupMemory = false; - bool usingLinalgOnTensors = true; }; // Returns SPIR-V CodeGen options from command-line options.
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/ConcretizeTileAmongWorkgroupsPass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/ConcretizeTileAmongWorkgroupsPass.cpp index ec5658e..2c5fbed 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/ConcretizeTileAmongWorkgroupsPass.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/ConcretizeTileAmongWorkgroupsPass.cpp
@@ -157,20 +157,6 @@ linalgOps.assign(ops.begin(), ops.end()); linalg::LinalgDependenceGraph dependenceGraph(aliases, linalgOps); - // NOTE: Launch configuration expects the original input/output type to decide - // the configuration. But we have already tiled the Linalg ops here. Use an - // attribute to send it over for now. - const char inputTypeAttrName[] = "iree.codegen.original_input_types"; - const char outputTypeAttrName[] = "iree.codegen.original_output_types"; - if (!inputTypes.empty()) { - rootOp->setAttr(inputTypeAttrName, - Builder(rootOp).getTypeArrayAttr(inputTypes)); - } - if (!outputTypes.empty()) { - rootOp->setAttr(outputTypeAttrName, - Builder(rootOp).getTypeArrayAttr(outputTypes)); - } - Optional<LaunchConfig> launchConfig = initGPULaunchConfig( rootOp->getContext(), dependenceGraph, options, linalgOps); if (!launchConfig) {
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp index 4726e36..ef56ea1 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/ConvertToGPUPass.cpp
@@ -23,7 +23,6 @@ #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/Common/Transforms.h" #include "iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.h" #include "iree/compiler/Conversion/LinalgToSPIRV/MemorySpace.h" @@ -439,16 +438,10 @@ } /// Distributes scf.parallel to workitems using local invocation ID. -static LogicalResult mapToLocalInvocationId( - ConversionPatternRewriter &rewriter, scf::ParallelOp pLoopOp, - bool useCyclicDistribution = false) { - if (useCyclicDistribution) { - return distributeCyclicallyToProcessors<gpu::ThreadIdOp, gpu::BlockDimOp>( - rewriter, pLoopOp); - } - return distributeSingleIterationPerProcessor<gpu::ThreadIdOp, - gpu::BlockDimOp>(rewriter, - pLoopOp); +static LogicalResult mapToLocalInvocationId(ConversionPatternRewriter &rewriter, + scf::ParallelOp pLoopOp) { + return distributeCyclicallyToProcessors<gpu::ThreadIdOp, gpu::BlockDimOp>( + rewriter, pLoopOp); } /// Distributes scf.parallel to workitems using global invocation ID. The GPU @@ -499,22 +492,14 @@ namespace { /// Pass to convert from tiled and fused linalg ops into gpu.func. -class ConvertToGPUPass +struct ConvertToGPUPass : public PassWrapper<ConvertToGPUPass, OperationPass<IREE::HAL::ExecutableTargetOp>> { - public: - ConvertToGPUPass(const SPIRVCodegenOptions &passOptions) - : options(passOptions) {} - ConvertToGPUPass(const ConvertToGPUPass &pass) : options(pass.options) {} - void getDependentDialects(DialectRegistry ®istry) const override { registry.insert<AffineDialect, gpu::GPUDialect, scf::SCFDialect, ShapeDialect>(); } void runOnOperation() override; - - private: - SPIRVCodegenOptions options; }; struct SerializeParallelLoopPattern @@ -532,7 +517,7 @@ template <typename LinalgOpTy> static LogicalResult mapLinalgOpToLocalInvocationIdImpl( LinalgOpTy linalgOp, ArrayRef<Value> operands, - ConversionPatternRewriter &rewriter, bool optimizeControlFlow) { + ConversionPatternRewriter &rewriter) { // Check for marker that specifies that the linalg op is to be partitioned // across threads within a workgroup. if (!hasMarker(linalgOp)) return failure(); @@ -542,7 +527,7 @@ if (loops.getValue().empty()) return success(); auto pLoopOp = cast<scf::ParallelOp>(loops.getValue()[0]); - return mapToLocalInvocationId(rewriter, pLoopOp, optimizeControlFlow); + return mapToLocalInvocationId(rewriter, pLoopOp); } static LogicalResult distributeCopyOp(linalg::CopyOp copyOp, @@ -580,7 +565,7 @@ template <> LogicalResult mapLinalgOpToLocalInvocationIdImpl<linalg::CopyOp>( linalg::CopyOp copyOp, ArrayRef<Value> operands, - ConversionPatternRewriter &rewriter, bool optimizeControlFlow) { + ConversionPatternRewriter &rewriter) { if (!hasMarker(copyOp, {getCopyToWorkgroupMemoryMarker(), getWorkgroupMarker()})) return failure(); @@ -591,7 +576,7 @@ auto pLoopOp = cast<scf::ParallelOp>(loops.getValue()[0]); if (hasMarker(copyOp, getWorkgroupMarker())) { - return mapToLocalInvocationId(rewriter, pLoopOp, optimizeControlFlow); + return mapToLocalInvocationId(rewriter, pLoopOp); } return distributeCopyOp(copyOp, pLoopOp, rewriter); } @@ -601,15 +586,14 @@ template <typename LinalgOpTy> struct MapLinalgOpToLocalInvocationId : public OpConversionPattern<LinalgOpTy> { MapLinalgOpToLocalInvocationId(MLIRContext *context, - bool usingLinalgOnTensorsPath, PatternBenefit benefit = 1) - : OpConversionPattern<LinalgOpTy>(context, benefit), - usingLinalgOnTensorsPath(usingLinalgOnTensorsPath) {} + : OpConversionPattern<LinalgOpTy>(context, benefit) {} + LogicalResult matchAndRewrite( LinalgOpTy linalgOp, ArrayRef<Value> operands, ConversionPatternRewriter &rewriter) const override { - if (failed(mapLinalgOpToLocalInvocationIdImpl(linalgOp, operands, rewriter, - usingLinalgOnTensorsPath))) + if (failed( + mapLinalgOpToLocalInvocationIdImpl(linalgOp, operands, rewriter))) return failure(); // If the `linalgOp` writes to workgroup memory insert barrier after the @@ -626,13 +610,6 @@ rewriter.eraseOp(linalgOp); return success(); } - - private: - /// Flag to signify if Linalg on tensors path is being used. The control flow - /// optimizations implemented on legacy path seems to be failing on this - /// path. Assuming this overhead is not too much, for now just generated the - /// extra loops. - bool usingLinalgOnTensorsPath; }; /// Given the workload return the workgroup count along X obtained by @@ -655,10 +632,8 @@ struct MapLinalgOpToGlobalInvocationId : public OpConversionPattern<LinalgOpTy> { MapLinalgOpToGlobalInvocationId(MLIRContext *context, - bool usingLinalgOnTensorsPath, PatternBenefit benefit = 1) - : OpConversionPattern<LinalgOpTy>(context, benefit), - usingLinalgOnTensorsPath(usingLinalgOnTensorsPath) {} + : OpConversionPattern<LinalgOpTy>(context, benefit) {} LogicalResult matchAndRewrite( LinalgOpTy linalgOp, ArrayRef<Value> operands, @@ -685,46 +660,15 @@ workgroupSize = {32, 1, 1}; } } - if (usingLinalgOnTensorsPath) { - WorkgroupCountRegionBuilder regionBuilder = - [&workgroupSize]( - OpBuilder &b, Location loc, - std::array<Value, 3> workload) -> std::array<Value, 3> { - Value one = b.create<ConstantIndexOp>(loc, 1); - return {getWorkgroupCountX(b, loc, workload, workgroupSize[0]), one, - one}; - }; - if (failed(defineWorkgroupCountRegion(rewriter, funcOp, regionBuilder))) { - return failure(); - } - } else { - // TODO (GH-4901): Only support static shapes on this path. This should be - // removed when moved to linalg on tensors. - Optional<SmallVector<int64_t, 4>> staticLoopRange = - linalgOp.getStaticLoopRanges(); - if (!staticLoopRange || - llvm::any_of(staticLoopRange.getValue(), [](int64_t d) { - return d == ShapedType::kDynamicSize; - })) { - return linalgOp.emitError("failed to find statlc loop bounds"); - } - ArrayRef<int64_t> parallelLoopRange(staticLoopRange.getValue()); - unsigned numOuterParallel = getNumOuterParallelLoops(linalgOp); - parallelLoopRange = parallelLoopRange.take_front(numOuterParallel); - WorkgroupCountRegionBuilder regionBuilder = - [¶llelLoopRange, &workgroupSize]( - OpBuilder &b, Location loc, - std::array<Value, 3> workload) -> std::array<Value, 3> { - Value one = b.create<ConstantIndexOp>(loc, 1); - auto values = llvm::to_vector<4>( - llvm::map_range(parallelLoopRange, [&](int64_t dim) -> Value { - return b.create<ConstantIndexOp>(loc, dim); - })); - return {getWorkgroupCountX(b, loc, values, workgroupSize[0]), one, one}; - }; - if (failed(defineWorkgroupCountRegion(rewriter, funcOp, regionBuilder))) { - return failure(); - } + WorkgroupCountRegionBuilder regionBuilder = + [&workgroupSize](OpBuilder &b, Location loc, + std::array<Value, 3> workload) { + Value one = b.create<ConstantIndexOp>(loc, 1); + return std::array<Value, 3>{ + getWorkgroupCountX(b, loc, workload, workgroupSize[0]), one, one}; + }; + if (failed(defineWorkgroupCountRegion(rewriter, funcOp, regionBuilder))) { + return failure(); } if (failed(updateWorkGroupSize(funcOp, workgroupSize))) { return failure(); @@ -732,13 +676,6 @@ rewriter.eraseOp(linalgOp); return success(); } - - private: - /// Flag to signify if Linalg on tensors path is being used. This changes the - /// way the number of workgroups is computed. With the linalg on tensors path, - /// the hal.executable.entry_point will be updated to contain a region that - /// gives the number of workgroups to use. - bool usingLinalgOnTensorsPath; }; /// Remove the linalg.range operation created when lowering to loops. @@ -843,8 +780,7 @@ MapLinalgOpToLocalInvocationId<linalg::PoolingNHWCMaxFOp>, MapLinalgOpToLocalInvocationId<linalg::PoolingNHWCMinFOp>, MapLinalgOpToLocalInvocationId<linalg::PoolingNHWCSumFOp>, - RemoveLinalgRange, SerializeParallelLoopPattern>( - context, options.usingLinalgOnTensors); + RemoveLinalgRange, SerializeParallelLoopPattern>(context); FrozenRewritePatternSet frozenPatterns(std::move(patterns)); for (FuncOp funcOp : getOperation().getInnerModule().getOps<FuncOp>()) { @@ -860,15 +796,13 @@ } std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createConvertToGPUPass(const SPIRVCodegenOptions &options) { - return std::make_unique<ConvertToGPUPass>(options); +createConvertToGPUPass() { + return std::make_unique<ConvertToGPUPass>(); } static PassRegistration<ConvertToGPUPass> pass( - "iree-codegen-convert-to-gpu", "Map tiled linalg and loop ops to GPU", [] { - SPIRVCodegenOptions options = getSPIRVCodegenOptionsFromClOptions(); - return std::make_unique<ConvertToGPUPass>(options); - }); + "iree-codegen-convert-to-gpu", "Map tiled linalg and loop ops to GPU", + [] { return std::make_unique<ConvertToGPUPass>(); }); } // namespace iree_compiler } // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/FoldGPUProcessorIDUses.cpp b/iree/compiler/Conversion/LinalgToSPIRV/FoldGPUProcessorIDUses.cpp index 1e35b13..53de3b4 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/FoldGPUProcessorIDUses.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/FoldGPUProcessorIDUses.cpp
@@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/LinalgToSPIRV/Passes.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "llvm/ADT/STLExtras.h"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp b/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp index 90b1619..c8e3ae5 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.cpp
@@ -24,7 +24,6 @@ #include "iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.h" #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/Common/LaunchConfig.h" #include "iree/compiler/Conversion/LinalgToSPIRV/Passes.h" #include "iree/compiler/Conversion/LinalgToSPIRV/Utils.h" @@ -203,8 +202,7 @@ const SPIRVCodegenOptions &options, TileSizesListType &tileSizes, LaunchConfigInfo &config) { - if (options.enableVectorization && - succeeded(getMaliSpecificConfig(op, targetEnv, options, tileSizes, + if (succeeded(getMaliSpecificConfig(op, targetEnv, options, tileSizes, config.workgroupSize, config.numSubgroups))) { config.vectorize = true; @@ -337,10 +335,9 @@ // transfer_read ops with permutation maps that we currently cannot lower. // TODO: Remove this restriction once the lowering of the permutation map is // supported in core. - bool vectorize = options.enableVectorization && - llvm::all_of(linalgOp.getIndexingMaps(), [](AffineMap &map) { - return map.isMinorIdentity(); - }); + bool vectorize = llvm::all_of(linalgOp.getIndexingMaps(), [](AffineMap &map) { + return map.isMinorIdentity(); + }); int64_t subgroupSize = targetEnv.getResourceLimits().subgroup_size().getValue().getSExtValue(); config.workgroupSize[0] = subgroupSize; @@ -455,15 +452,13 @@ const SPIRVCodegenOptions &options, TileSizesListType &tileSizes, LaunchConfigInfo &config) { - if (options.enableVectorization && - succeeded(getConfigForCooperativeMatmul(op, targetEnv, options, tileSizes, + if (succeeded(getConfigForCooperativeMatmul(op, targetEnv, options, tileSizes, config.workgroupSize, config.numSubgroups))) { config.vectorize = true; return success(); } - if (options.enableVectorization && - succeeded(getTargetSpecificConfig(op, targetEnv, options, tileSizes, + if (succeeded(getTargetSpecificConfig(op, targetEnv, options, tileSizes, config.workgroupSize, config.numSubgroups))) { config.vectorize = true; @@ -575,8 +570,7 @@ const SPIRVCodegenOptions &options, TileSizesListType &tileSizes, LaunchConfigInfo &config) { - if (options.enableVectorization && - targetEnv.getVendorID() == spirv::Vendor::ARM && + if (targetEnv.getVendorID() == spirv::Vendor::ARM && succeeded(getMaliSpecificConfig(op, tileSizes, config))) { return success(); } @@ -587,11 +581,7 @@ const int64_t tileSizeX = 32; int64_t tileSizeY = maxWorkgroupSize / tileSizeX; SmallVector<int64_t, 4> ts; - if (options.usingLinalgOnTensors) { - ts.assign({0, 1, tileSizeY, tileSizeX}); - } else { - ts.assign({1, tileSizeY, tileSizeX}); - } + ts.assign({0, 1, tileSizeY, tileSizeX}); tileSizes.emplace_back(std::move(ts)); config.workgroupSize = {tileSizeX, tileSizeY, 1}; return success(); @@ -689,11 +679,7 @@ const int64_t tileSizeX = 32; int64_t tileSizeY = maxWorkgroupSize / tileSizeX; SmallVector<int64_t, 4> ts; - if (options.usingLinalgOnTensors) { - ts.assign({0, 1, tileSizeY, tileSizeX}); - } else { - ts.assign({1, tileSizeY, tileSizeX}); - } + ts.assign({0, 1, tileSizeY, tileSizeX}); tileSizes.emplace_back(std::move(ts)); config.workgroupSize = {tileSizeX, tileSizeY, 1}; return success(); @@ -711,12 +697,8 @@ const int64_t tileSizeX = 32; int64_t tileSizeY = maxWorkgroupSize / tileSizeX; SmallVector<int64_t, 4> ts; - if (options.usingLinalgOnTensors) { - // There are five parallel loops in depthwise_conv_2d_input_nhwc_filter_hwcf - ts.assign({0, 0, 1, tileSizeY, tileSizeX}); - } else { - ts.assign({1, tileSizeY, tileSizeX}); - } + // There are five parallel loops in depthwise_conv_2d_input_nhwc_filter_hwcf + ts.assign({0, 0, 1, tileSizeY, tileSizeX}); tileSizes.emplace_back(std::move(ts)); config.workgroupSize = {tileSizeX, tileSizeY, 1}; return success(); @@ -737,11 +719,7 @@ const int64_t tileSizeX = 32; int64_t tileSizeY = maxWorkgroupSize / tileSizeX; SmallVector<int64_t, 4> ts; - if (options.usingLinalgOnTensors) { - ts.assign({0, tileSizeY, tileSizeX, 1}); - } else { - ts.assign({0, tileSizeY, tileSizeX}); - } + ts.assign({0, tileSizeY, tileSizeX, 1}); tileSizes.emplace_back(std::move(ts)); config.workgroupSize = {tileSizeX, tileSizeY, 1}; return success(); @@ -782,7 +760,7 @@ // Invocation level. launchConfig.setTileSizes(linalgOp.getOperation(), invocationTileSizes, 2); - launchConfig.setVectorize(options.enableVectorization); + launchConfig.setVectorize(true); } SmallVector<int64_t, 3> workgroupSize(options.workgroupSize.begin(), options.workgroupSize.end());
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndDistributePass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndDistributePass.cpp deleted file mode 100644 index ba1333b..0000000 --- a/iree/compiler/Conversion/LinalgToSPIRV/LinalgTileAndDistributePass.cpp +++ /dev/null
@@ -1,155 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//===- LinalgTileAndDistributePass.cpp ------------------------------------===// -// -// This pass tiles and distributes linalg operations among multiple workgroups. -// -// NOTE: Deprecated. This pass is used for the first-level tiling in the Linalg -// on buffers path, which is expected to go away soon. -// -//===----------------------------------------------------------------------===// - -#include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" -#include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" -#include "iree/compiler/Conversion/Common/Transforms.h" -#include "iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h" -#include "iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.h" -#include "iree/compiler/Conversion/LinalgToSPIRV/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/HALDialect.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -#define DEBUG_TYPE "iree-linalg-to-spirv-tile-and-distribute" - -namespace mlir { -namespace iree_compiler { -namespace { - -/// Returns the distribution options for operations when targeting workgroups. -linalg::LinalgLoopDistributionOptions getWorkgroupDistributionOptions() { - linalg::LinalgLoopDistributionOptions options; - - options.procInfo = [](OpBuilder &builder, Location loc, - ArrayRef<Range> parallelLoopRanges) { - return getGPUProcessorIdsAndCounts<gpu::BlockIdOp, gpu::GridDimOp>( - builder, loc, parallelLoopRanges.size()); - }; - options.distributionMethod.assign( - 3, linalg::DistributionMethod::CyclicNumProcsEqNumIters); - - return options; -} - -class LinalgTileAndDistributePass - : public PassWrapper<LinalgTileAndDistributePass, - OperationPass<IREE::HAL::ExecutableTargetOp>> { - public: - LinalgTileAndDistributePass(const SPIRVCodegenOptions &options) - : options(options) {} - LinalgTileAndDistributePass(const LinalgTileAndDistributePass &that) - : options(that.options) {} - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert<AffineDialect, IREE::HAL::HALDialect, linalg::LinalgDialect, - scf::SCFDialect>(); - } - - void runOnOperation() override { - MLIRContext *context = &getContext(); - IREE::HAL::ExecutableTargetOp targetOp = getOperation(); - ModuleOp module = targetOp.getInnerModule(); - - for (FuncOp funcOp : module.getOps<FuncOp>()) { - if (!isEntryPoint(funcOp)) continue; - - SmallVector<linalg::LinalgOp, 4> linalgOps; - SmallVector<Operation *, 4> tiledLoops; - - if (failed(getLinalgOps(funcOp, linalgOps, tiledLoops))) { - // If there are no linalg ops, nothing to do here. - continue; - } - - linalg::Aliases aliases; - linalg::LinalgDependenceGraph dependenceGraph(aliases, linalgOps); - Optional<LaunchConfig> launchConfigOpt = - initGPULaunchConfig(context, dependenceGraph, options, linalgOps); - if (!launchConfigOpt) { - // Having no launch configuration also means nothing to do here. - continue; - } - LaunchConfig &launchConfig = *launchConfigOpt; - - LLVM_DEBUG({ - llvm::dbgs() - << "\n--- IREE Linalg tile and distribute configuration ---\n"; - llvm::dbgs() << "@func " << funcOp.getName() - << ": # workgroup sizes: ["; - interleaveComma(launchConfig.getWorkgroupSize(), llvm::dbgs()); - llvm::dbgs() << "]\n"; - for (auto op : linalgOps) { - llvm::dbgs() << "\t" << op.getOperation()->getName() << " : "; - TileSizesListTypeRef tileSizes = launchConfig.getTileSizes(op); - llvm::dbgs() << "{"; - std::string sep = ""; - for (auto &level : enumerate(tileSizes)) { - llvm::dbgs() << sep << level.index() << " : ["; - sep = ", "; - interleaveComma(level.value(), llvm::dbgs()); - llvm::dbgs() << "]"; - } - llvm::dbgs() << "}\n"; - } - }); - TileAndFuseOptions tileAndFuseOptions = { - getWorkgroupDistributionOptions(), allocateWorkgroupMemory}; - if (failed(tileAndFuseLinalgBufferOps(funcOp, linalgOps, dependenceGraph, - launchConfig, - tileAndFuseOptions)) || - failed( - updateWorkGroupSize(funcOp, launchConfig.getWorkgroupSize()))) { - return signalPassFailure(); - } - } - } - - private: - SPIRVCodegenOptions options; -}; - -} // namespace - -std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createTileAndDistributeAmongWorkgroupsPass(const SPIRVCodegenOptions &options) { - return std::make_unique<LinalgTileAndDistributePass>(options); -} - -static PassRegistration<LinalgTileAndDistributePass> pass( - "iree-codegen-spirv-linalg-tile-and-distribute", - "Tile and distribute Linalg operations on buffers", [] { - SPIRVCodegenOptions options = getSPIRVCodegenOptionsFromClOptions(); - return std::make_unique<LinalgTileAndDistributePass>(options); - }); - -} // namespace iree_compiler -} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/MatMulVectorizationTest.cpp b/iree/compiler/Conversion/LinalgToSPIRV/MatMulVectorizationTest.cpp deleted file mode 100644 index 295a48e..0000000 --- a/iree/compiler/Conversion/LinalgToSPIRV/MatMulVectorizationTest.cpp +++ /dev/null
@@ -1,76 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h" -#include "mlir/IR/Builders.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassRegistry.h" - -static llvm::cl::opt<int> wgTileSize( - "iree-codegen-linalg-to-gpu-wg-tile-size", - llvm::cl::desc( - "Specify the size of workgroup tile for matmul vector lowering"), - llvm::cl::init(32)); - -static llvm::cl::list<uint32_t> unrollSize( - "iree-codegen-linalg-to-gpu-unroll-size", - llvm::cl::desc("Specify the size of the "), llvm::cl::CommaSeparated); - -static llvm::cl::opt<bool> enableLICM( - "iree-codegen-linalg-to-gpu-matmul-licm", - llvm::cl::desc( - "If true run LICM and hoisting passes after the staged transforms"), - llvm::cl::init(true)); - -namespace mlir { -namespace iree_compiler { - -namespace { -struct MatMulTileAndVectorizeGPUPass - : PassWrapper<MatMulTileAndVectorizeGPUPass, FunctionPass> { - void runOnFunction() override; -}; -} // namespace - -void MatMulTileAndVectorizeGPUPass::runOnFunction() { - FuncOp fn = getFunction(); - SmallVector<uint32_t, 3> vUnrollSize(unrollSize.begin(), unrollSize.end()); - if (vUnrollSize.size() != 3) signalPassFailure(); - linalg::CodegenStrategy strategy; - strategy - .tile<linalg::MatmulOp>( - linalg::LinalgTilingOptions() - // TODO(thomasraoux): Enable parallel loops once affine.min - // canonicalize supports it. - //.setLoopType(linalg::LinalgTilingLoopType::ParallelLoops) - .setTileSizes({wgTileSize, wgTileSize, wgTileSize})) - .setEnableLICM(enableLICM) - .vectorize<linalg::MatmulOp>() - // TODO upstream to the core CodegenStrategy - // .unrollVector<vector::ContractionOp>( - // {vUnrollSize[0], vUnrollSize[1], vUnrollSize[2]}) - ; - strategy.transform(fn); -} - -std::unique_ptr<FunctionPass> createMatMulTileAndVectorizeGPUPass() { - return std::make_unique<MatMulTileAndVectorizeGPUPass>(); -} - -static PassRegistration<MatMulTileAndVectorizeGPUPass> pass( - "iree-codegen-linalg-to-gpu-matmul-vectorization-pass", - "Tile and vectorize linalg.matmul operation", - [] { return std::make_unique<MatMulTileAndVectorizeGPUPass>(); }); - -} // namespace iree_compiler -} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/Passes.cpp b/iree/compiler/Conversion/LinalgToSPIRV/Passes.cpp index 72aa4ae..ae2b6d6 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/Passes.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/Passes.cpp
@@ -83,14 +83,10 @@ // - The Linalg op is kept untouched. // //===--------------------------------------------------------------------===// - if (options.usingLinalgOnTensors) { - // flow.dispatch.workgroups performed abstract tiling and distribution. Make - // them concrete now since we know the target and settings now. - pm.addPass(createConcretizeTileAmongWorkgroupsPass(options)); - } else { - pm.addPass(createSplitDispatchFunctionPass()); - pm.addPass(createTileAndDistributeAmongWorkgroupsPass(options)); - } + + // flow.dispatch.workgroups performed abstract tiling and distribution. Make + // them concrete now since we know the target and settings now. + pm.addPass(createConcretizeTileAmongWorkgroupsPass(options)); pm.addPass(createTileAndVectorizeInOneWorkgroupPass(options)); pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); @@ -103,10 +99,8 @@ // workgroups. // - Linalg ops are converted to loop.for ops and mapped to workitems. //===--------------------------------------------------------------------===// - pm.addPass(createConvertToGPUPass(options)); - if (options.enableVectorization) { - pm.nest<ModuleOp>().addNestedPass<FuncOp>(createVectorToGPUPass()); - } + pm.addPass(createConvertToGPUPass()); + pm.nest<ModuleOp>().addNestedPass<FuncOp>(createVectorToGPUPass()); pm.nest<ModuleOp>().addPass(createLowerAffinePass()); pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); pm.nest<ModuleOp>().addPass(createCSEPass()); @@ -120,29 +114,22 @@ // - Load/store on std.subview ops are converted into load/store on the // original buffers. //===--------------------------------------------------------------------===// - if (options.enableVectorization) { - pm.nest<ModuleOp>().addNestedPass<FuncOp>( - createVectorTransferOptimizationPass()); - } + pm.nest<ModuleOp>().addNestedPass<FuncOp>( + createVectorTransferOptimizationPass()); pm.nest<ModuleOp>().addPass(memref::createFoldSubViewOpsPass()); pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); pm.nest<ModuleOp>().addPass(createCSEPass()); - if (options.enableVectorization) { - pm.nest<ModuleOp>().addPass(createVectorizeMemrefLoadStorePass()); - pm.nest<ModuleOp>().addNestedPass<FuncOp>( - createConvertVectorToCooperativeMatrixPass()); - pm.nest<ModuleOp>().addNestedPass<FuncOp>( - createForOpCanonicalizationPass()); - pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); - pm.nest<ModuleOp>().addPass(createCSEPass()); - } + pm.nest<ModuleOp>().addPass(createVectorizeMemrefLoadStorePass()); + pm.nest<ModuleOp>().addNestedPass<FuncOp>( + createConvertVectorToCooperativeMatrixPass()); + pm.nest<ModuleOp>().addNestedPass<FuncOp>(createForOpCanonicalizationPass()); + pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); + pm.nest<ModuleOp>().addPass(createCSEPass()); - if (options.usingLinalgOnTensors) { - pm.nest<ModuleOp>().addNestedPass<FuncOp>(createFlattenMemRefSubspanPass()); - pm.nest<ModuleOp>().addPass(createLowerAffinePass()); - pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); - pm.nest<ModuleOp>().addPass(createCSEPass()); - } + pm.nest<ModuleOp>().addNestedPass<FuncOp>(createFlattenMemRefSubspanPass()); + pm.nest<ModuleOp>().addPass(createLowerAffinePass()); + pm.nest<ModuleOp>().addPass(createCanonicalizerPass()); + pm.nest<ModuleOp>().addPass(createCSEPass()); //===--------------------------------------------------------------------===// // Final conversion to SPIR-V dialect.
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/Passes.h b/iree/compiler/Conversion/LinalgToSPIRV/Passes.h index 7214ed8..82e5fca 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/Passes.h +++ b/iree/compiler/Conversion/LinalgToSPIRV/Passes.h
@@ -37,7 +37,7 @@ /// Pass to add the synchronizations and attributes needed to lower from PLoops /// to GPU dialect. std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createConvertToGPUPass(const SPIRVCodegenOptions &options); +createConvertToGPUPass(); /// Pass to perform the final conversion to SPIR-V dialect. /// This pass converts remaining interface ops into SPIR-V global variables, @@ -45,16 +45,6 @@ /// corresponding SPIR-V ops. std::unique_ptr<OperationPass<ModuleOp>> createConvertToSPIRVPass(); -/// Pass to split computation workload to multiple sequential dispatch -/// functions. This pass operates on Linalg ops and prepares for lowering to -/// GPU, where we need to tile the workload to workgroups and workitems. If the -/// workload involves computation A and B, where B is dependent on A and A needs -/// all workgroups to complete, then we need to split A and B into different -/// kernels because there is no mechanism to perform cross-workgroup -/// synchronization within a single kernel. -std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createSplitDispatchFunctionPass(); - /// Pass to convert vector operations to GPU level operations. Instructions of /// vector size equal to subgroup size are distributed across the subgroup. std::unique_ptr<OperationPass<FuncOp>> createVectorToGPUPass(); @@ -63,9 +53,6 @@ /// cooperative matrix ops when possible. std::unique_ptr<FunctionPass> createConvertVectorToCooperativeMatrixPass(); -/// Pass to apply tiling and vectorization transformations on linagl::MatMulOp. -std::unique_ptr<FunctionPass> createMatMulTileAndVectorizeGPUPass(); - /// Converts memref of scalar to memref of vector of efficent size. This will /// allow to convert memory accesses to vector load/store in SPIR-V without /// having pointer bitcast. @@ -86,11 +73,6 @@ std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> createConcretizeTileAmongWorkgroupsPass(const SPIRVCodegenOptions &options); -/// Tiles and distributes Linalg operations on buffers among multiple -/// workgroups. -std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createTileAndDistributeAmongWorkgroupsPass(const SPIRVCodegenOptions &options); - //===----------------------------------------------------------------------===// // Pipelines //===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/SplitDispatchFunctionPass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/SplitDispatchFunctionPass.cpp deleted file mode 100644 index e612d67..0000000 --- a/iree/compiler/Conversion/LinalgToSPIRV/SplitDispatchFunctionPass.cpp +++ /dev/null
@@ -1,330 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//===- SplitDispathFunctionPass.cpp ---------------------------------------===// -// -// This file implements a pass to split computation workload to multiple -// sequential dispatch functions. This pass operates on Linalg ops and -// scf.parallel op and prepares for lowering to GPU, where we need to tile the -// workload to workgroups and workitems. If the workload involves computation A -// and B, where B is dependent on A and A needs all workgroups to complete, then -// we need to split A and B into different kernels because there is no mechanism -// to perform cross-workgroup synchronization within a single kernel. -// -//===----------------------------------------------------------------------===// - -#include <iterator> - -#include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" -#include "iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.h" -#include "iree/compiler/Conversion/LinalgToSPIRV/Passes.h" -#include "iree/compiler/Dialect/IREE/IR/IREEOps.h" -#include "iree/compiler/Dialect/Shape/IR/ShapeOps.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" -#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/RegionUtils.h" - -#define DEBUG_TYPE "split-dispatch-function" - -namespace mlir { -namespace iree_compiler { - -//===----------------------------------------------------------------------===// -// Utility functions -//===----------------------------------------------------------------------===// - -/// Returns true if an op can be fused with the list of ops that are to be put -/// in the same entry point function. This should be consistent with whatthe -/// downstream passes can handle. -static bool isFusableWithCurrentOpsList( - Operation *nextOp, ArrayRef<Operation *> currOpsList, - const linalg::LinalgDependenceGraph &dependenceGraph) { - if (currOpsList.empty()) return true; - - linalg::LinalgOp dstOp = dyn_cast<linalg::LinalgOp>(nextOp); - linalg::LinalgOp srcOp = dyn_cast<linalg::LinalgOp>(currOpsList.back()); - if (dstOp && srcOp) { - // TODO(#2963): This splits independent linalg opreations into its own - // dispatch, but in reality if the iteration domain of the ops are the same, - // and they have all iterator types parallel, they could be put in the same - // dispatch region. - if (!dependenceGraph.hasDependenceFrom(srcOp, dstOp)) return false; - -#define ADD_FUSABLE_PAIR(SrcOpTy, DstOpTy, DependenceTy) \ - if (isa<SrcOpTy>(srcOp.getOperation()) && \ - isa<DstOpTy>(dstOp.getOperation()) && \ - dependenceGraph.hasDependenceFrom(srcOp, dstOp, DependenceTy)) \ - return true; - - ADD_FUSABLE_PAIR(linalg::BatchMatmulOp, linalg::GenericOp, - linalg::LinalgDependenceGraph::DependenceType::RAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::BatchMatmulOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::ConvInputNWCFilterWCFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::ConvInputNHWCFilterHWCFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::ConvInputNDHWCFilterDHWCFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::DepthwiseConvInputNHWCFilterHWCOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::DepthwiseConvInputNHWCFilterHWCFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::MatmulOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::BatchMatmulOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::PoolingNHWCMaxFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::PoolingNHWCMinFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::FillOp, linalg::PoolingNHWCSumFOp, - linalg::LinalgDependenceGraph::DependenceType::WAW) - ADD_FUSABLE_PAIR(linalg::MatmulOp, linalg::GenericOp, - linalg::LinalgDependenceGraph::DependenceType::RAW) - -#undef ADD_FUSABLE_PAIR - } - return false; -} - -/// For the list of operations in `ops` returns a list of lists where each list -/// contains the operations that need to be put in a separate dispatch function. -static LogicalResult separateOps( - ArrayRef<Operation *> ops, - const linalg::LinalgDependenceGraph &dependenceGraph, - SmallVectorImpl<SmallVector<Operation *, 1>> &fusedOpList) { - assert(!ops.empty() && - "expected at least one separable op for splitting dispatch function"); - SmallVector<Operation *, 1> currList; - for (auto currOp = ops.begin(), nextOp = std::next(ops.begin()); - nextOp != ops.end(); ++currOp, ++nextOp) { - // Check that the operation has buffer semantics. - if (auto linalgOp = dyn_cast<linalg::LinalgOp>(*currOp)) { - if (!linalgOp.hasBufferSemantics()) return failure(); - } - - // Require no other non-metadata ops interleave with Linalg structured ops - // for now. This is the common case and it simplifies further analysis. - Operation *iter = (*currOp)->getNextNode(); - while (iter != *nextOp && (MemoryEffectOpInterface::hasNoEffect(iter) || - isa<IREE::PlaceholderOp>(iter))) - iter = iter->getNextNode(); - if (iter != *nextOp) return failure(); - - currList.push_back(*currOp); - - // If the nextOp is not fusible with the currOp, then record the list of ops - // so far, and start a new list. - if (isFusableWithCurrentOpsList(*nextOp, currList, dependenceGraph)) { - continue; - } - - // Push the current list of ops into the list of lists `currList` and - // start a new list. - fusedOpList.emplace_back(); - std::swap(fusedOpList.back(), currList); - } - currList.push_back(ops.back()); - fusedOpList.emplace_back(std::move(currList)); - return success(); -} - -/// Recursively collects all the operations that are referenced by given -/// `rootOp` into `closure`. -static void collectAllReferencedOps( - ArrayRef<Operation *> rootOps, - llvm::SmallPtrSetImpl<Operation *> &closure) { - llvm::SmallVector<Operation *, 8> workList; - workList.assign(rootOps.begin(), rootOps.end()); - - while (!workList.empty()) { - Operation *curOp = workList.pop_back_val(); - if (!curOp) continue; - if (!closure.insert(curOp).second) continue; // Seen before - // Collect all defining ops for operands. - for (Value operand : curOp->getOperands()) { - if (Operation *owner = operand.getDefiningOp()) workList.push_back(owner); - } - // Collect all defining ops for the values used in regions. - for (Region ®ion : curOp->getRegions()) { - visitUsedValuesDefinedAbove(region, [&workList](OpOperand *operand) { - workList.push_back(operand->get().getDefiningOp()); - }); - } - } -} - -//===----------------------------------------------------------------------===// -// Pass and patterns -//===----------------------------------------------------------------------===// - -namespace { - -struct SplitDispatchFunctionPass - : public PassWrapper<SplitDispatchFunctionPass, - OperationPass<IREE::HAL::ExecutableTargetOp>> { - void runOnOperation() override; - LogicalResult splitDispatchFunction(FuncOp oldFn, OpBuilder &builder); -}; - -} // namespace - -void SplitDispatchFunctionPass::runOnOperation() { - IREE::HAL::ExecutableTargetOp targetOp = getOperation(); - ModuleOp moduleOp = targetOp.getInnerModule(); - - // Collect all dispatch entry functions. - SmallVector<FuncOp, 1> functions; - for (FuncOp fn : moduleOp.getOps<FuncOp>()) { - if (isEntryPoint(fn)) functions.push_back(fn); - } - if (functions.empty()) return; - if (functions.size() > 1) { - moduleOp.emitError("expected only one entry function"); - return signalPassFailure(); - } - - auto builder = OpBuilder::atBlockBegin(moduleOp.getBody()); - if (failed(splitDispatchFunction(functions.front(), builder))) { - return signalPassFailure(); - } -} - -LogicalResult SplitDispatchFunctionPass::splitDispatchFunction( - FuncOp oldFn, OpBuilder &builder) { - // Entry functions are supported to be of `void(void)` type. - assert(oldFn.getType().getNumInputs() == 0 && - oldFn.getType().getNumResults() == 0); - - if (!llvm::hasSingleElement(oldFn.getBlocks())) { - return oldFn.emitError("expected only one block"); - } - IREE::HAL::ExecutableEntryPointOp oldEntryPointOp = getEntryPoint(oldFn); - if (!oldEntryPointOp) { - return oldFn.emitError("unable to find iree.executable.entry_point for ") - << oldFn.getName(); - } - // The dispatch function should have more than one separable ops. Otherwise - // there is nothing to do. - Block &fnBody = oldFn.getBlocks().front(); - - // Collect all Linalg and scf.parallel ops for splitting. - SmallVector<Operation *, 4> separableOps; - for (Operation &op : fnBody) - if (isa<linalg::LinalgOp, scf::ParallelOp, scf::ForOp>(op)) - separableOps.push_back(&op); - - if (separableOps.size() <= 1) return success(); - - linalg::Aliases aliases; - linalg::LinalgDependenceGraph dependenceGraph = - linalg::LinalgDependenceGraph::buildDependenceGraph(aliases, oldFn); - SmallVector<SmallVector<Operation *, 1>, 1> fusedOpsList; - if (failed(separateOps(separableOps, dependenceGraph, fusedOpsList))) { - return oldFn.emitError( - "cannot separate Linalg/Parallel ops into multiple kernels"); - } - if (fusedOpsList.size() <= 1) return success(); - - ModuleOp moduleOp = cast<ModuleOp>(oldFn->getParentOp()); - Block &oldFnBlock = oldFn.getBlocks().front(); - Location loc = oldFn.getLoc(); - SmallVector<Attribute, 4> entryPoints; - - for (const auto &fusedOps : llvm::enumerate(fusedOpsList)) { - if (fusedOps.value().empty()) continue; - // Create a new function for hosting this op. - std::string newFnName = - llvm::formatv("{0}_dispatch_{1}", oldFn.getName(), fusedOps.index()); - builder.setInsertionPointToStart(moduleOp.getBody()); - auto newFn = builder.create<FuncOp>(loc, newFnName, oldFn.getType()); - LLVM_DEBUG({ - llvm::dbgs() << "Created new function : func @" << newFn.getName() - << "\n"; - }); - - // Copy over all attributes except type and name. - for (const auto &namedAttr : oldFn->getAttrs()) { - if (namedAttr.first != impl::getTypeAttrName() && - namedAttr.first != SymbolTable::getSymbolAttrName()) - newFn->setAttr(namedAttr.first, namedAttr.second); - } - - // Add the entry point operations for the new fn. - { - OpBuilder::InsertionGuard g(builder); - builder.setInsertionPoint(oldEntryPointOp); - auto clonedEntryPointOp = cast<IREE::HAL::ExecutableEntryPointOp>( - builder.clone(*oldEntryPointOp.getOperation())); - clonedEntryPointOp.sym_nameAttr(builder.getStringAttr(newFnName)); - clonedEntryPointOp.ordinalAttr( - builder.getIndexAttr(static_cast<int32_t>(entryPoints.size()))); - entryPoints.push_back(builder.getSymbolRefAttr(clonedEntryPointOp)); - } - - // Collect the closure for the current Linalg op. - llvm::SmallPtrSet<Operation *, 16> closure; - collectAllReferencedOps(fusedOps.value(), closure); - - // Clone all ops in the closure to the new function. - Block *newFnBlock = newFn.addEntryBlock(); - builder.setInsertionPointToStart(newFnBlock); - BlockAndValueMapping remapper; - for (Operation &op : oldFnBlock) { - if (closure.count(&op) == 0) continue; - builder.insert(op.clone(remapper)); - if (&op == fusedOps.value().back()) break; - } - builder.insert(oldFnBlock.getTerminator()->clone(remapper)); - } - moduleOp->setAttr(getEntryPointScheduleAttrName(), - builder.getArrayAttr(entryPoints)); - - LLVM_DEBUG({ llvm::dbgs() << "Erased func @" << oldFn.getName() << "\n"; }); - oldFn.erase(); - oldEntryPointOp.erase(); - return success(); -} - -//===----------------------------------------------------------------------===// -// Pass entry point and registration -//===----------------------------------------------------------------------===// - -std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>> -createSplitDispatchFunctionPass() { - return std::make_unique<SplitDispatchFunctionPass>(); -} - -static PassRegistration<SplitDispatchFunctionPass> pass( - "iree-codegen-split-dispatch-function", - "Split workload to multiple dispatch functions to satisfy computation " - "dependency for GPU lowering"); - -} // namespace iree_compiler -} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/TileAndVectorizeInOneWorkgroupPass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/TileAndVectorizeInOneWorkgroupPass.cpp index 6e7a712..8fc4463 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/TileAndVectorizeInOneWorkgroupPass.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/TileAndVectorizeInOneWorkgroupPass.cpp
@@ -22,7 +22,6 @@ #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h" #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h" #include "iree/compiler/Conversion/CodegenUtils/TransformUtils.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/Common/Transforms.h" #include "iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h" #include "iree/compiler/Conversion/LinalgToSPIRV/KernelDispatchUtils.h"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/VectorizeMemrefLoadStorePass.cpp b/iree/compiler/Conversion/LinalgToSPIRV/VectorizeMemrefLoadStorePass.cpp index 00ae4c9..499c38e 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/VectorizeMemrefLoadStorePass.cpp +++ b/iree/compiler/Conversion/LinalgToSPIRV/VectorizeMemrefLoadStorePass.cpp
@@ -418,8 +418,9 @@ } }; -class VectorizeMemRefPass final - : public PassWrapper<VectorizeMemRefPass, OperationPass<ModuleOp>> { +class VectorizeMemRefLoadStorePass final + : public PassWrapper<VectorizeMemRefLoadStorePass, + OperationPass<ModuleOp>> { void runOnOperation() override; private: @@ -455,7 +456,7 @@ return success(); } -void VectorizeMemRefPass::runOnOperation() { +void VectorizeMemRefLoadStorePass::runOnOperation() { // Uses the signature conversion methodology of the dialect conversion // framework to implement the conversion. ModuleOp module = getOperation(); @@ -504,10 +505,10 @@ } std::unique_ptr<OperationPass<ModuleOp>> createVectorizeMemrefLoadStorePass() { - return std::make_unique<VectorizeMemRefPass>(); + return std::make_unique<VectorizeMemRefLoadStorePass>(); } -static PassRegistration<VectorizeMemRefPass> pass( +static PassRegistration<VectorizeMemRefLoadStorePass> pass( "iree-spirv-vectorize-memref-load-store", "Vectorize interface memrefs and their load/store for better memory " "access");
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/BUILD b/iree/compiler/Conversion/LinalgToSPIRV/test/BUILD index 2ad8790..090dab0 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/BUILD +++ b/iree/compiler/Conversion/LinalgToSPIRV/test/BUILD
@@ -36,11 +36,9 @@ "forop_canonicalization.mlir", "materialize_launch_configuration.mlir", "materialize_launch_configuration2.mlir", - "matmul_vectorization_licm.mlir", "pipeline_matmul_cooperative_matrix.mlir", "pipeline_matmul_vectorization.mlir", "promote_workgroup_memory.mlir", - "split_dispatch_function.mlir", "tile_and_vectorize_batch_matmul.mlir", "tile_and_vectorize_conv.mlir", "tile_and_vectorize_matmul.mlir",
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/CMakeLists.txt b/iree/compiler/Conversion/LinalgToSPIRV/test/CMakeLists.txt index 2d60bba..b53451a 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/CMakeLists.txt +++ b/iree/compiler/Conversion/LinalgToSPIRV/test/CMakeLists.txt
@@ -23,11 +23,9 @@ "forop_canonicalization.mlir" "materialize_launch_configuration.mlir" "materialize_launch_configuration2.mlir" - "matmul_vectorization_licm.mlir" "pipeline_matmul_cooperative_matrix.mlir" "pipeline_matmul_vectorization.mlir" "promote_workgroup_memory.mlir" - "split_dispatch_function.mlir" "tile_and_vectorize_batch_matmul.mlir" "tile_and_vectorize_conv.mlir" "tile_and_vectorize_matmul.mlir"
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration.mlir index ba4a9e7..6cf8c8f 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration.mlir +++ b/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration.mlir
@@ -1,4 +1,4 @@ -// RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.target(iree-spirv-concretize-tile-among-workgroups))" -iree-codegen-spirv-experimental-linalg-on-tensors -cse -canonicalize -split-input-file %s | IreeFileCheck %s +// RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.target(iree-spirv-concretize-tile-among-workgroups))" -canonicalize -cse -split-input-file %s | IreeFileCheck %s hal.executable @matmul_tensors attributes {sym_visibility = "private"} { hal.interface @io {
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration2.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration2.mlir index 30a1b85..81b3b9d 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration2.mlir +++ b/iree/compiler/Conversion/LinalgToSPIRV/test/materialize_launch_configuration2.mlir
@@ -1,4 +1,4 @@ -// RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.target(iree-codegen-convert-to-gpu))" -iree-codegen-spirv-experimental-linalg-on-tensors -cse -canonicalize -split-input-file %s | IreeFileCheck %s +// RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.target(iree-codegen-convert-to-gpu))" -canonicalize -cse -split-input-file %s | IreeFileCheck %s hal.executable @add attributes {sym_visibility = "private"} { hal.interface @io {
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/matmul_vectorization_licm.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/matmul_vectorization_licm.mlir deleted file mode 100644 index 223fe2b..0000000 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/matmul_vectorization_licm.mlir +++ /dev/null
@@ -1,21 +0,0 @@ -// RUN: iree-opt --iree-codegen-linalg-to-gpu-matmul-vectorization-pass -// RUN: -split-input-file %s --iree-codegen-linalg-to-gpu-unroll-size=8,8,32 \ -// RUN: -iree-codegen-linalg-to-gpu-matmul-licm | IreeFileCheck %s - -// CHECK-LABEL: func @matmul_128x128x128 -// CHECK-SAME: (%[[ARG0:.+]]: memref<128x128xf32>, %[[ARG1:.+]]: memref<128x128xf32>, %[[ARG2:.+]]: memref<128x128xf32>) -func @matmul_128x128x128(%arg0 : memref<128x128xf32>, %arg1: memref<128x128xf32>, %arg2: memref<128x128xf32>) { - linalg.matmul ins(%arg0, %arg1 : memref<128x128xf32>, memref<128x128xf32>) outs(%arg2 : memref<128x128xf32>) - return -} - -// CHECK-DAG: %[[TILESIZE:.+]] = constant 32 : index -// CHECK-DAG: %[[MATSIZE:.+]] = constant 128 : index -// CHECK-DAG: %[[START:.+]] = constant 0 : index -// CHECK: scf.for %[[IL:.+]] = %[[START]] to %[[MATSIZE]] step %[[TILESIZE]] -// CHECK: scf.for %[[JL:.+]] = %[[START]] to %[[MATSIZE]] step %[[TILESIZE]] -// CHECK: %[[SUBVVIEWC:.+]] = memref.subview %[[ARG2]][%[[IL]], %[[JL]]] [32, 32] [1, 1] : memref<128x128xf32> to memref<32x32xf32 -// CHECK: scf.for %[[KL:.+]] = %[[START]] to %[[MATSIZE]] step %[[TILESIZE]] -// CHECK: %[[SUBVVIEWA:.+]] = memref.subview %[[ARG0]][%[[IL]], %[[KL]]] [32, 32] [1, 1] : memref<128x128xf32> to memref<32x32xf32 -// CHECK: %[[SUBVVIEWB:.+]] = memref.subview %[[ARG1]][%[[KL]], %[[JL]]] [32, 32] [1, 1] : memref<128x128xf32> to memref<32x32xf32 -
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir deleted file mode 100644 index 977d0c3..0000000 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir +++ /dev/null
@@ -1,692 +0,0 @@ -// RUN: iree-opt -allow-unregistered-dialect -split-input-file -pass-pipeline='hal.executable(hal.executable.target(iree-codegen-split-dispatch-function))' -verify-diagnostics %s | IreeFileCheck %s - -hal.executable @kernel_fusable_fill_conv1d_ops attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_fill_conv1d_ops attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x512xf32>, !flow.dispatch.tensor<readonly:3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x512xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_fill_conv1d_ops - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.conv_1d_input_nwc_filter_wcf - // CHECK: return - - func @kernel_fusable_fill_conv1d_ops() { - %cst = constant 0.000000e+00 : f32 - %dim = hal.interface.load.constant offset = 0 : index - %shape1 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,3,512]> - %shape2 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,1,512]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x3x512xf32>, !shapex.ranked_shape<[?,3,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x512xf32> - %ts2 = shapex.tie_shape %2, %shape2 : memref<?x1x512xf32>, !shapex.ranked_shape<[?,1,512]> - linalg.fill(%ts2, %cst) : memref<?x1x512xf32>, f32 - linalg.conv_1d_input_nwc_filter_wcf { - dilations = dense<1> : tensor<1xi64>, - strides = dense<2> : tensor<1xi64>} - ins(%ts1, %1 : memref<?x3x512xf32>, memref<3x512x1xf32>) - outs(%ts2 : memref<?x1x512xf32>) - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// ----- - -hal.executable @kernel_fusable_fill_conv2d_ops attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_fill_conv2d_ops attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x3x512xf32>, !flow.dispatch.tensor<readonly:3x3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x1x512xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_fill_conv2d_ops - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.conv_2d_input_nhwc_filter_hwcf - // CHECK: return - - func @kernel_fusable_fill_conv2d_ops() { - %cst = constant 0.000000e+00 : f32 - %dim = hal.interface.load.constant offset = 0 : index - %shape1 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,3,3,512]> - %shape2 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,1,1,512]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x3x3x512xf32>, !shapex.ranked_shape<[?,3,3,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - %ts2 = shapex.tie_shape %2, %shape2 : memref<?x1x1x512xf32>, !shapex.ranked_shape<[?,1,1,512]> - linalg.fill(%ts2, %cst) : memref<?x1x1x512xf32>, f32 - linalg.conv_2d_input_nhwc_filter_hwcf { - dilations = dense<1> : tensor<2xi64>, - strides = dense<2> : tensor<2xi64>} - ins(%ts1, %1 : memref<?x3x3x512xf32>, memref<3x3x512x1xf32>) - outs(%ts2 : memref<?x1x1x512xf32>) - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -hal.executable @kernel_fusable_fill_conv3d_ops attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_fill_conv3d_ops attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x3x3x512xf32>, !flow.dispatch.tensor<readonly:3x3x3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x1x1x512xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_fill_conv3d_ops - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.conv_3d_input_ndhwc_filter_dhwcf - // CHECK: return - - func @kernel_fusable_fill_conv3d_ops() { - %cst = constant 0.000000e+00 : f32 - %dim = hal.interface.load.constant offset = 0 : index - %shape1 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,3,3,3,512]> - %shape2 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,1,1,1,512]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x3x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x3x3x3x512xf32>, !shapex.ranked_shape<[?,3,3,3,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x1x512xf32> - %ts2 = shapex.tie_shape %2, %shape2 : memref<?x1x1x1x512xf32>, !shapex.ranked_shape<[?,1,1,1,512]> - linalg.fill(%ts2, %cst) : memref<?x1x1x1x512xf32>, f32 - linalg.conv_3d_input_ndhwc_filter_dhwcf { - dilations = dense<1> : tensor<3xi64>, - strides = dense<2> : tensor<3xi64>} - ins(%ts1, %1 : memref<?x3x3x3x512xf32>, memref<3x3x3x512x1xf32>) - outs(%ts2 : memref<?x1x1x1x512xf32>) - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -hal.executable @kernel_fusable_fill_matmul_ops attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_fill_matmul_ops attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x512xf32>, !flow.dispatch.tensor<readonly:512x?xf32>, - !flow.dispatch.tensor<writeonly:?x?xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_fill_matmul_ops - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.matmul - // CHECK: return - - func @kernel_fusable_fill_matmul_ops() { - %cst = constant 0.000000e+00 : f32 - %dimM = hal.interface.load.constant offset = 0 : index - %dimN = hal.interface.load.constant offset = 1 : index - %shape1 = shapex.make_ranked_shape %dimM : (index) -> !shapex.ranked_shape<[?,512]> - %shape2 = shapex.make_ranked_shape %dimN : (index) -> !shapex.ranked_shape<[512,?]> - %shape3 = shapex.make_ranked_shape %dimM, %dimN : (index, index) -> !shapex.ranked_shape<[?,?]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x512xf32>, !shapex.ranked_shape<[?,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<512x?xf32> - %ts2 = shapex.tie_shape %1, %shape2 : memref<512x?xf32>, !shapex.ranked_shape<[512, ?]> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x?xf32> - %ts3 = shapex.tie_shape %2, %shape3 : memref<?x?xf32>, !shapex.ranked_shape<[?,?]> - linalg.fill(%ts3, %cst) : memref<?x?xf32>, f32 - linalg.matmul ins(%ts1, %ts2 : memref<?x512xf32>, memref<512x?xf32>) - outs(%ts3 : memref<?x?xf32>) - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -hal.executable @kernel_fusable_pooling attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_pooling attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x?xf32>, !flow.dispatch.tensor<readonly:?x?x?x?xf32>, - !flow.dispatch.tensor<writeonly:?x?x?x?xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_pooling() - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.pooling_nhwc_sum - // CHECK: return - func @kernel_fusable_pooling() { - %cst = constant 0.000000e+00 : f32 - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x?xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<?x?x?x?xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x?x?x?xf32> - linalg.fill(%2, %cst) : memref<?x?x?x?xf32>, f32 - linalg.pooling_nhwc_sum - {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} - ins(%1, %0: memref<?x?x?x?xf32>, memref<?x?xf32>) - outs(%2: memref<?x?x?x?xf32>) - return - } - hal.interface @io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -hal.executable @kernel attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x3x512xf32>, !flow.dispatch.tensor<readonly:3x3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x1x512xf32>) -> ()} - // CHECK: hal.executable.entry_point @kernel_dispatch_0 - // CHECK: hal.executable.entry_point @kernel_dispatch_1 - // CHECK: module attributes {hal.entry_point_schedule = [@kernel_dispatch_0, @kernel_dispatch_1]} - module { - // CHECK: func @kernel_dispatch_1() - // CHECK: %[[ZERO:.+]] = constant - // CHECK: %[[DIM:.+]] = hal.interface.load.constant - // CHECK: %[[SHAPE:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - // CHECK: %[[TS:.+]] = shapex.tie_shape %[[OUT]], %[[SHAPE]] - // CHECK: linalg.fill(%[[TS]], %[[ZERO]]) - // CHECK: return - - // CHECK: func @kernel_dispatch_0() - // CHECK: %[[DIM:.+]] = hal.interface.load.constant - // CHECK: %[[SHAPE1:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[SHAPE2:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[IN1:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x512xf32> - // CHECK: %[[TS1:.+]] = shapex.tie_shape %[[IN1]], %[[SHAPE1]] - // CHECK: %[[IN2:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - // CHECK: %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - // CHECK: %[[TS2:.+]] = shapex.tie_shape %[[OUT]], %[[SHAPE2]] - // CHECK: linalg.conv_2d_input_nhwc_filter_hwcf - // CHECK-SAME: ins(%[[TS1]], %[[IN2]] : memref<?x3x3x512xf32>, memref<3x3x512x1xf32>) - // CHECK-SAME: outs(%[[TS2]] : memref<?x1x1x512xf32>) - // CHECK: return - - func @kernel() { - %cst = constant 0.000000e+00 : f32 - %dim = hal.interface.load.constant offset = 0 : index - %shape1 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,3,3,512]> - %shape2 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,1,1,512]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x3x3x512xf32>, !shapex.ranked_shape<[?,3,3,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - %ts2 = shapex.tie_shape %2, %shape2 : memref<?x1x1x512xf32>, !shapex.ranked_shape<[?,1,1,512]> - linalg.conv_2d_input_nhwc_filter_hwcf { - dilations = dense<1> : tensor<2xi64>, - strides = dense<2> : tensor<2xi64>} - ins(%ts1, %1 : memref<?x3x3x512xf32>, memref<3x3x512x1xf32>) - outs(%ts2 : memref<?x1x1x512xf32>) - linalg.fill(%ts2, %cst) : memref<?x1x1x512xf32>, f32 - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -hal.executable @kernel attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x3x512xf32>, !flow.dispatch.tensor<readonly:3x3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x1x512xf32>) -> ()} - // CHECK: hal.executable.entry_point @kernel_dispatch_0 - // CHECK: hal.executable.entry_point @kernel_dispatch_1 - // CHECK: hal.executable.entry_point @kernel_dispatch_2 - // CHECK: module attributes {hal.entry_point_schedule = [@kernel_dispatch_0, @kernel_dispatch_1, @kernel_dispatch_2]} - module { - // CHECK: func @kernel_dispatch_2() - // CHECK: %[[DIM:.+]] = hal.interface.load.constant - // CHECK: %[[SHAPE1:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[SHAPE2:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[IN1:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x512xf32> - // CHECK: %[[TS1:.+]] = shapex.tie_shape %[[IN1]], %[[SHAPE1]] - // CHECK: %[[IN2:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - // CHECK: %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - // CHECK: %[[TS2:.+]] = shapex.tie_shape %[[OUT]], %[[SHAPE2]] - // CHECK: linalg.conv_2d_input_nhwc_filter_hwcf - // CHECK-SAME: ins(%[[TS1]], %[[IN2]] : memref<?x3x3x512xf32>, memref<3x3x512x1xf32>) - // CHECK-SAME: outs(%[[TS2]] : memref<?x1x1x512xf32>) - // CHECK: return - - // CHECK: func @kernel_dispatch_1() - // CHECK: %[[C0:.+]] = constant 0 : index - // CHECK: %[[C1:.+]] = constant 1 : index - // CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[C1]]) step (%[[C1]]) - // CHECK: scf.yield - // CHECK: return - - // CHECK: func @kernel_dispatch_0() - // CHECK: %[[ZERO:.+]] = constant - // CHECK: %[[DIM:.+]] = hal.interface.load.constant - // CHECK: %[[SHAPE:.+]] = shapex.make_ranked_shape %[[DIM]] - // CHECK: %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - // CHECK: %[[TS:.+]] = shapex.tie_shape %[[OUT]], %[[SHAPE]] - // CHECK: linalg.fill(%[[TS]], %[[ZERO]]) - // CHECK: return - - func @kernel() { - %cst = constant 0.000000e+00 : f32 - %c0 = constant 0 : index - %c1 = constant 1 : index - %dim = hal.interface.load.constant offset = 0 : index - %shape1 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,3,3,512]> - %shape2 = shapex.make_ranked_shape %dim : (index) -> !shapex.ranked_shape<[?,1,1,512]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x3x3x512xf32> - %ts1 = shapex.tie_shape %0, %shape1 : memref<?x3x3x512xf32>, !shapex.ranked_shape<[?,3,3,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x1x1x512xf32> - %ts2 = shapex.tie_shape %2, %shape2 : memref<?x1x1x512xf32>, !shapex.ranked_shape<[?,1,1,512]> - linalg.fill(%ts2, %cst) : memref<?x1x1x512xf32>, f32 - scf.parallel (%iv) = (%c0) to (%c1) step (%c1) { - scf.yield - } - linalg.conv_2d_input_nhwc_filter_hwcf { - dilations = dense<1> : tensor<2xi64>, - strides = dense<2> : tensor<2xi64>} - ins(%ts1, %1 : memref<?x3x3x512xf32>, memref<3x3x512x1xf32>) - outs(%ts2 : memref<?x1x1x512xf32>) - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- - -// Nothing to do if there is just one Linalg op. - -hal.executable @kernel attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:1x3x3x512xf32>, !flow.dispatch.tensor<readonly:3x3x512x1xf32>, - !flow.dispatch.tensor<writeonly:1x1x1x1xf32>) -> ()} - // CHECK-NOT: hal.entry_point_schedule - module { - // CHECK-LABEL: @kernel() - func @kernel() attributes {hal.num_workgroups_fn = @kernel__num_workgroups__} { - %cst = constant 0.000000e+00 : f32 - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<1x3x3x512xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<1x1x1x1xf32> - linalg.conv_2d_input_nhwc_filter_hwcf { - dilations = dense<1> : tensor<2xi64>, - strides = dense<2> : tensor<2xi64>} - ins(%0, %1 : memref<1x3x3x512xf32>, memref<3x3x512x1xf32>) - outs(%2 : memref<1x1x1x1xf32>) - return - } - // CHECK-LABEL: @kernel__num_workgroups__ - hal.interface @io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - - - -// ----- - -// Do not split when Linalg and non-Linalg ops are interleaving each other. - -hal.executable @kernel attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x3x512xf32>, !flow.dispatch.tensor<readonly:3x512x1xf32>, - !flow.dispatch.tensor<writeonly:?x1x1xf32>) -> ()} - module { - // expected-error @+1 {{cannot separate Linalg/Parallel ops into multiple kernels}} - func @kernel() { - %cst = constant 0.000000e+00 : f32 - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<1x3x3x512xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<3x3x512x1xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<1x1x1x1xf32> - linalg.fill(%2, %cst) : memref<1x1x1x1xf32>, f32 - "some_op"() : () -> () - linalg.conv_2d_input_nhwc_filter_hwcf { - dilations = dense<1> : tensor<2xi64>, - strides = dense<2> : tensor<2xi64>} - ins(%0, %1 : memref<1x3x3x512xf32>, memref<3x3x512x1xf32>) - outs(%2 : memref<1x1x1x1xf32>) - return - } - hal.interface @io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// ----- -#map0 = affine_map<(d0, d1) -> (d0 * 12 + d1 + 53)> - -hal.executable @subview_interleaved attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @subview_interleaved attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:18x12xf32>, !flow.dispatch.tensor<writeonly:18x12xf32>) -> ()} - module { - func @subview_interleaved() { - %cst = constant 0.000000e+00 : f32 - %0 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<18x12xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<18x12xf32> - linalg.fill(%0, %cst) : memref<18x12xf32>, f32 - %2 = memref.subview %0[4, 5] [18, 12] [1, 1] : memref<18x12xf32> to memref<18x12xf32, #map0> - linalg.copy(%1, %2) : memref<18x12xf32>, memref<18x12xf32, #map0> - return - } - hal.interface @io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write" - } - } - } -} - -// CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0 * 12 + d1 + 53)> -// CHECK-DAG: hal.executable.entry_point @subview_interleaved_dispatch_0 -// CHECK-DAG: hal.executable.entry_point @subview_interleaved_dispatch_1 -// CHECK: module attributes {hal.entry_point_schedule = -// CHECK-SAME: [@subview_interleaved_dispatch_0, @subview_interleaved_dispatch_1]} -// CHECK: func @subview_interleaved_dispatch_1() -// CHECK-DAG: %[[DST:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<18x12xf32> -// CHECK-DAG: %[[SRC:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<18x12xf32> -// CHECK: %[[SUB:.+]] = memref.subview %[[DST]][4, 5] [18, 12] [1, 1] : memref<18x12xf32> to memref<18x12xf32, #[[MAP0]]> -// CHECK: linalg.copy(%[[SRC]], %[[SUB]]) : memref<18x12xf32>, memref<18x12xf32, #[[MAP0]]> -// CHECK: return -// CHECK: func @subview_interleaved_dispatch_0() -// CHECK: %[[CST:.+]] = constant -// CHECK: %[[DST2:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<18x12xf32> -// CHECK: linalg.fill(%[[DST2]], %[[CST]]) : memref<18x12xf32>, f32 -// CHECK: return - -// ----- - -#map0 = affine_map<(d0, d1) -> (d0, d1)> -#map1 = affine_map<(d0, d1, d2) -> (d0, d1)> -#map2 = affine_map<(d0, d1, d2) -> (d2)> - -hal.executable @reshape_interleaved attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" - hal.interface.binding @ret1, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @reshape_interleaved attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:2x4xf32>, !flow.dispatch.tensor<writeonly:1x2x4xf32>, - !flow.dispatch.tensor<writeonly:2x4xf32>) -> ()} - module { - func @reshape_interleaved() { - %0 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<2x4xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@ret1} : memref<1x2x4xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<2x4xf32> - linalg.generic {indexing_maps = [#map0, #map0], - iterator_types = ["parallel", "parallel"]} - ins(%2 : memref<2x4xf32>) - outs(%0 : memref<2x4xf32>) { - ^bb0(%arg0: f32, %arg1: f32): // no predecessors - %4 = math.tanh %arg0 : f32 - linalg.yield %4 : f32 - } - %3 = linalg.reshape %0 [#map1, #map2] : memref<2x4xf32> into memref<1x2x4xf32> - linalg.copy(%3, %1) : memref<1x2x4xf32>, memref<1x2x4xf32> - return - } - hal.interface @io attributes {sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard" - hal.interface.binding @ret1, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK-DAG: hal.executable.entry_point @reshape_interleaved_dispatch_0 -// CHECK-DAG: hal.executable.entry_point @reshape_interleaved_dispatch_1 -// CHECK: module attributes {hal.entry_point_schedule = -// CHECK-SAME: [@reshape_interleaved_dispatch_0, @reshape_interleaved_dispatch_1]} -// CHECK: func @reshape_interleaved_dispatch_1() -// CHECK: %[[SRC1:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<2x4xf32> -// CHECK: %[[DST:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret1} : memref<1x2x4xf32> -// CHECK: %[[SRC2:.+]] = linalg.reshape %[[SRC1]] [#[[MAP0]], #[[MAP1]]] : memref<2x4xf32> into memref<1x2x4xf32> -// CHECK: linalg.copy(%[[SRC2]], %[[DST]]) : memref<1x2x4xf32>, memref<1x2x4xf32> -// CHECK: return -// CHECK: func @reshape_interleaved_dispatch_0() -// CHECK: %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<2x4xf32> -// CHECK: %[[IN:.+]] = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<2x4xf32> -// CHECK: linalg.generic -// CHECK-SAME: ins(%[[IN]] : -// CHECK-SAME: outs(%[[OUT]] : - -// ----- - -hal.executable @predict_ex_dispatch_0 attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - hal.interface.binding @ret1, set=0, binding=3, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @predict_ex_dispatch_0 attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:1x512x1xf32>, !flow.dispatch.tensor<readonly:4x8x16xf32>, - !flow.dispatch.tensor<writeonly:4x8x16xf32>, !flow.dispatch.tensor<writeonly:4x8x16xf32>) -> ()} - module { - func @predict_ex_dispatch_0() { - %0 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<1x512x1xf32> - %1 = iree.placeholder for "interface buffer" {binding = @io::@ret1} : memref<4x8x16xf32> - %2 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<1x512x1xf32> - linalg.copy(%2, %0) : memref<1x512x1xf32>, memref<1x512x1xf32> - %3 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<4x8x16xf32> - linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (-d0 + 3, d1, d2)>, - affine_map<(d0, d1, d2) -> (d0, d1, d2)>], - iterator_types = ["parallel", "parallel", "parallel"]} - ins(%3 : memref<4x8x16xf32>) - outs(%1 : memref<4x8x16xf32>) { - ^bb0(%arg0: f32, %arg1: f32): // no predecessors - linalg.yield %arg0 : f32 - } - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-DAG: hal.executable.entry_point @predict_ex_dispatch_0_dispatch_0 -// CHECK-DAG: hal.executable.entry_point @predict_ex_dispatch_0_dispatch_1 -// CHECK: module attributes {hal.entry_point_schedule = -// CHECK-SAME: [@predict_ex_dispatch_0_dispatch_0, @predict_ex_dispatch_0_dispatch_1]} -// CHECK: func @predict_ex_dispatch_0_dispatch_1 -// CHECK-NEXT: iree.placeholder -// CHECK-SAME: binding = @io::@ret1 -// CHECK-NEXT: iree.placeholder -// CHECK-SAME: binding = @io::@arg1 -// CHECK-NEXT: linalg.generic -// CHECK: linalg.yield -// CHECK-NOT: linalg -// CHECK: return -// CHECK: func @predict_ex_dispatch_0_dispatch_0 -// CHECK-NEXT: iree.placeholder -// CHECK-SAME: binding = @io::@ret0 -// CHECK-NEXT: iree.placeholder -// CHECK-SAME: binding = @io::@arg0 -// CHECK-NEXT: linalg.copy -// CHECK-NOT: linalg -// CHECK: return - -// ----- - -hal.executable @kernel_fusable_fill_matmul_generic_ops attributes {sym_visiblity = "private"} { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @arg2, set=0, binding=2, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=3, type="StorageBuffer", access="Write|Discard" - } - hal.executable.target @vulkan, filter="vulkan*" { - hal.executable.entry_point @kernel_fusable_fill_matmul_generic_ops attributes { - interface = @io, ordinal = 0 : index, - signature = (!flow.dispatch.tensor<readonly:?x512xf32>, !flow.dispatch.tensor<readonly:512x?xf32>, - !flow.dispatch.tensor<readonly:?x?xf32>, !flow.dispatch.tensor<writeonly:?x?xf32>) -> ()} - module { - // CHECK: func @kernel_fusable_fill_matmul_generic_ops - // CHECK: linalg.fill - // CHECK-NOT: return - // CHECK: linalg.matmul - // CHECK-NOT: return - // CHECK: linalg.generic - // CHECK: return - - func @kernel_fusable_fill_matmul_generic_ops() { - %cst = constant 0.000000e+00 : f32 - %dimM = hal.interface.load.constant offset = 0 : index - %dimN = hal.interface.load.constant offset = 1 : index - %shape1 = shapex.make_ranked_shape %dimM : (index) -> !shapex.ranked_shape<[?,512]> - %shape2 = shapex.make_ranked_shape %dimN : (index) -> !shapex.ranked_shape<[512,?]> - %shape3 = shapex.make_ranked_shape %dimM, %dimN : (index, index) -> !shapex.ranked_shape<[?,?]> - %0 = iree.placeholder for "interface buffer" {binding = @io::@arg0} : memref<?x512xf32> - %ts0 = shapex.tie_shape %0, %shape1 : memref<?x512xf32>, !shapex.ranked_shape<[?,512]> - %1 = iree.placeholder for "interface buffer" {binding = @io::@arg1} : memref<512x?xf32> - %ts1 = shapex.tie_shape %1, %shape2 : memref<512x?xf32>, !shapex.ranked_shape<[512, ?]> - %2 = iree.placeholder for "interface buffer" {binding = @io::@arg2} : memref<?x?xf32> - %ts2 = shapex.tie_shape %2, %shape3 : memref<?x?xf32>, !shapex.ranked_shape<[?, ?]> - %3 = iree.placeholder for "interface buffer" {binding = @io::@ret0} : memref<?x?xf32> - %ts3 = shapex.tie_shape %3, %shape3 : memref<?x?xf32>, !shapex.ranked_shape<[?,?]> - %4 = memref.alloc(%dimM, %dimN) : memref<?x?xf32> - %ts4 = shapex.tie_shape %4, %shape3 : memref<?x?xf32>, !shapex.ranked_shape<[?,?]> - linalg.fill(%ts4, %cst) : memref<?x?xf32>, f32 - linalg.matmul ins(%ts0, %ts1 : memref<?x512xf32>, memref<512x?xf32>) - outs(%ts4 : memref<?x?xf32>) - linalg.generic - {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, - affine_map<(d0, d1) -> (d0, d1)>, - affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"]} - ins(%ts2, %ts4 : memref<?x?xf32>, memref<?x?xf32>) - outs(%ts3 : memref<?x?xf32>) { - ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32): - %5 = addf %arg0, %arg1 : f32 - linalg.yield %5 : f32 - } - return - } - hal.interface @io attributes {push_constants = 1 : index, sym_visibility = "private"} { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @arg2, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -}
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/tile_and_vectorize_conv.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/tile_and_vectorize_conv.mlir index 2a821bc..3eeae63 100644 --- a/iree/compiler/Conversion/LinalgToSPIRV/test/tile_and_vectorize_conv.mlir +++ b/iree/compiler/Conversion/LinalgToSPIRV/test/tile_and_vectorize_conv.mlir
@@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -pass-pipeline="hal.executable(hal.executable.target(iree-spirv-concretize-tile-among-workgroups,iree-spirv-tile-and-vectorize-in-one-workgroup))" -iree-spirv-enable-vectorization -iree-codegen-spirv-experimental-linalg-on-tensors -canonicalize -cse %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline="hal.executable(hal.executable.target(iree-spirv-concretize-tile-among-workgroups,iree-spirv-tile-and-vectorize-in-one-workgroup))" -canonicalize -cse %s | IreeFileCheck %s hal.executable @conv_static_shape_f32 attributes {sym_visibility = "private"} { hal.interface @io {
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h index 9220eb6..874adf6 100644 --- a/iree/compiler/Conversion/init_conversions.h +++ b/iree/compiler/Conversion/init_conversions.h
@@ -64,13 +64,10 @@ inline void registerLinalgToSPIRVPasses() { static bool init_once = []() { // LinalgToSPIRV - createConvertToGPUPass(SPIRVCodegenOptions()); + createConvertToGPUPass(); createFoldProcessorIDUsesPass(); - createTileAndDistributeAmongWorkgroupsPass(SPIRVCodegenOptions()); createTileAndVectorizeInOneWorkgroupPass(SPIRVCodegenOptions()); - createSplitDispatchFunctionPass(); createVectorToGPUPass(); - createMatMulTileAndVectorizeGPUPass(); createVectorizeMemrefLoadStorePass(); return true; }();
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp index 629a2e5..62418fc 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -16,7 +16,6 @@ #include <cstdlib> -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h" #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h" #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp index 3bb9b7f..eb684b4 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h" #include "iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" @@ -79,18 +78,9 @@ // names for constructing pipeline states. Get an ordered list of the entry // point names. SmallVector<StringRef, 8> entryPointNames; - if (auto scheduleAttr = innerModuleOp->getAttrOfType<ArrayAttr>( - iree_compiler::getEntryPointScheduleAttrName())) { - // We have multiple entry points in this module. Make sure the order - // specified in the schedule attribute is respected. - for (Attribute entryPoint : scheduleAttr) { - entryPointNames.push_back(entryPoint.cast<StringAttr>().getValue()); - } - } else { - spvModuleOp.walk([&](spirv::EntryPointOp entryPointOp) { - entryPointNames.push_back(entryPointOp.fn()); - }); - } + spvModuleOp.walk([&](spirv::EntryPointOp entryPointOp) { + entryPointNames.push_back(entryPointOp.fn()); + }); // 1. Serialize the spirv::ModuleOp into binary format. SmallVector<uint32_t, 0> spvBinary;
diff --git a/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.cpp index 65c035d..86dee04 100644 --- a/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Dialect/Shape/IR/ShapeOps.h" @@ -28,15 +27,6 @@ namespace IREE { namespace HAL { -// Records a full execution barrier that forces visibility of all buffers. -static void recordFullExecutionBarrier(Value commandBuffer, Location loc, - OpBuilder &builder) { - builder.create<IREE::HAL::CommandBufferExecutionBarrierOp>( - loc, commandBuffer, IREE::HAL::ExecutionStageBitfield::Dispatch, - IREE::HAL::ExecutionStageBitfield::Dispatch, - IREE::HAL::ExecutionBarrierFlagBitfield::None); -} - SPIRVTargetBackend::SPIRVTargetBackend(SPIRVCodegenOptions options) : spvCodeGenOptions_(std::move(options)) {} @@ -61,170 +51,6 @@ buildSPIRVTransformPassPipeline(passManager, spvCodeGenOptions_); } -LogicalResult SPIRVTargetBackend::recordDispatch( - Location loc, DispatchState dispatchState, - DeviceSwitchRewriter &switchRewriter) { - // TODO(#4140): remove this legacy path when linalg-on-tensors is used. - // In the linalg-on-tensors world where we are performing the tiling logic - // in the flow dialect we don't even really need the ability to override - // dispatch recording at all - just a way to allow targets to map workgroup - // counts from the N-dimensional flow workgroup counts to the 3D hal counts. - if (dispatchState.workgroupCount.size() == 3) { - return TargetBackend::recordDispatch(loc, dispatchState, switchRewriter); - } - - // Multiple entry points might be generated for a single dispatch function. - // Under such circumstances, we will have a special attribute indicating the - // schedule of the split entry points. Try to see if we can find such - // schedule attribute first. - ArrayAttr entryPointScheduleAttr; - spirv::ModuleOp spvModuleOp; - IREE::HAL::ExecutableOp executableOp = dispatchState.executableOp; - for (auto executableTargetOp : - executableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>()) { - if (matchPattern(executableTargetOp.target_backend_filter(), - filter_pattern())) { - ModuleOp innerModuleOp = executableTargetOp.getInnerModule(); - auto spvModuleOps = innerModuleOp.getOps<spirv::ModuleOp>(); - assert(llvm::hasSingleElement(spvModuleOps)); - spvModuleOp = *spvModuleOps.begin(); - entryPointScheduleAttr = innerModuleOp->getAttrOfType<ArrayAttr>( - iree_compiler::getEntryPointScheduleAttrName()); - if (!spvModuleOp) - return executableOp.emitError("unable to find spv.module"); - - SmallVector<IREE::HAL::ExecutableEntryPointOp, 2> entryPoints; - if (!entryPointScheduleAttr) { - entryPoints = llvm::to_vector<2>( - executableTargetOp.getOps<IREE::HAL::ExecutableEntryPointOp>()); - if (!llvm::hasSingleElement(entryPoints)) { - return executableTargetOp.emitError( - "expected a single entry point, found ") - << entryPoints.size(); - } - } else { - SymbolTable symTable(executableTargetOp); - for (Attribute entryPointAttr : entryPointScheduleAttr) { - auto entryPointOp = - symTable.lookup<IREE::HAL::ExecutableEntryPointOp>( - entryPointAttr.cast<FlatSymbolRefAttr>().getValue()); - if (!entryPointOp) { - return executableTargetOp.emitError( - "unable to find hal.executable.entry_point operation " - "for ") - << entryPointAttr.cast<FlatSymbolRefAttr>().getValue(); - } - entryPoints.push_back(entryPointOp); - } - } - - auto *region = switchRewriter.addConditionRegion( - IREE::HAL::DeviceMatchIDAttr::get(filter_pattern(), loc.getContext()), - { - dispatchState.workgroupCount[0], - dispatchState.commandBuffer, - }); - - auto &entryBlock = region->front(); - ConversionPatternRewriter &rewriter = switchRewriter.getRewriter(); - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToEnd(&entryBlock); - auto workload = entryBlock.getArgument(0); - auto commandBuffer = entryBlock.getArgument(1); - - // We have multiple entry points to dispatch. Record in the order - // specified by entry point schedule and insert barrier between sequential - // ones. - for (auto entryPoint : llvm::enumerate(entryPoints)) { - std::array<Value, 3> workgroupCount = calculateDispatchWorkgroupCount( - loc, executableOp, entryPoint.value(), workload, rewriter); - - if (llvm::any_of(workgroupCount, - [](Value v) -> bool { return v == nullptr; })) { - return entryPoint.value().emitError("unable to find workgroup count"); - } - - // Ordinals are fixed based on the precomputed schedule, so use - // CommandBufferDispatchOp instead of CommandBufferDispatchSymbolOp. - auto executable = rewriter - .create<IREE::HAL::ExecutableLookupOp>( - loc, dispatchState.device, - dispatchState.dispatchOp.executable()) - .getResult(); - int32_t entryPointOrdinal = entryPoint.index(); - rewriter.create<IREE::HAL::CommandBufferDispatchOp>( - loc, commandBuffer, executable, - rewriter.getIndexAttr(entryPointOrdinal), workgroupCount[0], - workgroupCount[1], workgroupCount[2]); - if (entryPoint.index() + 1 != entryPoints.size()) { - recordFullExecutionBarrier(commandBuffer, loc, rewriter); - } - } - rewriter.create<IREE::HAL::ReturnOp>(loc); - } - } - return success(); -} - -// Finds the spv.ExecutionMode operation to get the workgroup size from. -// TODO(ravishankarm): This might not be the only way this is specified. You -// could also have a spec constant, but that is not generated in the -// spv.module right now. -// TODO(ravishankarm): change workgroup size calculation to something we can -// query independently so that we don't need to lookup the value here. -std::array<Value, 3> SPIRVTargetBackend::calculateDispatchWorkgroupSize( - Location loc, IREE::HAL::ExecutableOp executableOp, - IREE::HAL::ExecutableEntryPointOp entryPointOp, ValueRange workload, - OpBuilder &builder) { - // TODO(ravishankarm): possibly emit different recordDispatch logic if the - // workgroup sizes differ among targets. - spirv::ModuleOp spvModuleOp; - for (auto executableTargetOp : - executableOp.getBlock().getOps<IREE::HAL::ExecutableTargetOp>()) { - if (matchPattern(executableTargetOp.target_backend_filter(), - filter_pattern())) { - ModuleOp innerModuleOp = executableTargetOp.getInnerModule(); - assert(!innerModuleOp->getAttr( - iree_compiler::getEntryPointScheduleAttrName())); - auto spvModuleOps = innerModuleOp.getOps<spirv::ModuleOp>(); - assert(llvm::hasSingleElement(spvModuleOps)); - spvModuleOp = *spvModuleOps.begin(); - break; - } - } - return calculateDispatchWorkgroupSize( - loc, spvModuleOp, entryPointOp.sym_name(), workload, builder); -} - -std::array<Value, 3> SPIRVTargetBackend::calculateDispatchWorkgroupSize( - Location loc, spirv::ModuleOp spvModuleOp, StringRef entryPointName, - ValueRange workload, OpBuilder &builder) { - std::array<Value, 3> workgroupSize; - for (auto executionModeOp : - spvModuleOp.getBlock().getOps<spirv::ExecutionModeOp>()) { - if (executionModeOp.fn() == entryPointName && - executionModeOp.execution_mode() == spirv::ExecutionMode::LocalSize) { - for (int i = 0; i < executionModeOp.values().size(); ++i) { - workgroupSize[i] = - builder.create<ConstantIndexOp>(loc, executionModeOp.values()[i] - .cast<IntegerAttr>() - .getValue() - .getZExtValue()); - } - break; - } - } - - // Pad out the workgroup size with 1's (if the original rank was < 3). - for (int i = 0; i < workgroupSize.size(); ++i) { - if (!workgroupSize[i]) { - workgroupSize[i] = builder.create<ConstantIndexOp>(loc, 1); - } - } - - return workgroupSize; -} - } // namespace HAL } // namespace IREE } // namespace iree_compiler
diff --git a/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h b/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h index ea3389c..9378c56 100644 --- a/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h +++ b/iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h
@@ -38,20 +38,6 @@ void buildTranslationPassPipeline(OpPassManager &passManager) override; - LogicalResult recordDispatch(Location loc, DispatchState dispatchState, - DeviceSwitchRewriter &switchRewriter) override; - - // Finds the spv.ExecutionMode operation to get the workgroup size from. - std::array<Value, 3> calculateDispatchWorkgroupSize( - Location loc, IREE::HAL::ExecutableOp executableOp, - IREE::HAL::ExecutableEntryPointOp entryPointOp, ValueRange workload, - OpBuilder &builder) override; - - private: - std::array<Value, 3> calculateDispatchWorkgroupSize( - Location loc, spirv::ModuleOp spvModuleOp, StringRef entryPointName, - ValueRange workload, OpBuilder &builder); - SPIRVCodegenOptions spvCodeGenOptions_; };
diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp index e1a556e..30499ab 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
@@ -14,7 +14,6 @@ #include "iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h" -#include "iree/compiler/Conversion/Common/Attributes.h" #include "iree/compiler/Conversion/LinalgToSPIRV/CodeGenOptionUtils.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/SPIRVCommon/SPIRVTarget.h" @@ -144,19 +143,9 @@ // list of entry point names here that are then passed in // VkShaderModuleCreateInfo. SmallVector<StringRef, 8> entryPointNames; - if (auto scheduleAttr = innerModuleOp->getAttrOfType<ArrayAttr>( - iree_compiler::getEntryPointScheduleAttrName())) { - // We have multiple entry points in this module. Make sure the order - // specified in the schedule attribute is respected. - for (Attribute entryPoint : scheduleAttr) { - entryPointNames.push_back( - entryPoint.cast<FlatSymbolRefAttr>().getValue()); - } - } else { - spvModuleOp.walk([&](spirv::EntryPointOp entryPointOp) { - entryPointNames.push_back(entryPointOp.fn()); - }); - } + spvModuleOp.walk([&](spirv::EntryPointOp entryPointOp) { + entryPointNames.push_back(entryPointOp.fn()); + }); auto entryPointsRef = builder.createStringVec(entryPointNames); iree_SpirVExecutableDef_entry_points_add(builder, entryPointsRef);
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp index 4bfa50a..e611f52 100644 --- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp +++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
@@ -268,13 +268,11 @@ Value listOperand = op.getOperation()->getOperand(listArgumentIndex); // deref - auto refOp = rewriter.create<emitc::CallOp>( + auto refOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t"), - /*callee=*/rewriter.getStringAttr("*"), - /*args=*/ArrayAttr{}, - /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{listOperand}); + /*applicableOperator=*/rewriter.getStringAttr("*"), + /*operand=*/listOperand); auto listDerefOp = rewriter.create<emitc::CallOp>( /*location=*/loc, @@ -282,7 +280,7 @@ /*callee=*/rewriter.getStringAttr("iree_vm_list_deref"), /*args=*/ArrayAttr{}, /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{refOp.getResult(0)}); + /*operands=*/ArrayRef<Value>{refOp.getResult()}); rewriter.create<emitc::CallOp>( /*location=*/loc, @@ -386,9 +384,10 @@ /*templateArgs=*/ArrayAttr{}, /*operands=*/ArrayRef<Value>{}); - auto elementTypePtrOp = rewriter.create<emitc::GetAddressOfOp>( + auto elementTypePtrOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*result=*/emitc::OpaqueType::get(ctx, "iree_vm_type_def_t*"), + /*applicableOperator=*/rewriter.getStringAttr("&"), /*operand=*/elementTypeOp.getResult(0)); auto listOp = rewriter.create<emitc::ConstOp>( @@ -396,9 +395,10 @@ /*resultType=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"), /*value=*/StringAttr::get(ctx, "NULL")); - auto listPtrOp = rewriter.create<emitc::GetAddressOfOp>( + auto listPtrOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*result=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t**"), + /*applicableOperator=*/rewriter.getStringAttr("&"), /*operand=*/listOp.getResult()); failableCall( @@ -497,18 +497,17 @@ /*resultType=*/emitc::OpaqueType::get(ctx, "iree_vm_value_t"), /*value=*/StringAttr::get(ctx, "")); - auto valuePtrOp = rewriter.create<emitc::GetAddressOfOp>( + auto valuePtrOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*result=*/emitc::OpaqueType::get(ctx, "iree_vm_value_t*"), + /*applicableOperator=*/rewriter.getStringAttr("&"), /*operand=*/valueOp.getResult()); - auto refOp = rewriter.create<emitc::CallOp>( + auto refOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t"), - /*callee=*/rewriter.getStringAttr("*"), - /*args=*/ArrayAttr{}, - /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{getOp.list()}); + /*applicableOperator=*/rewriter.getStringAttr("*"), + /*operand=*/getOp.list()); auto listDerefOp = rewriter.create<emitc::CallOp>( /*location=*/loc, @@ -516,7 +515,7 @@ /*callee=*/rewriter.getStringAttr("iree_vm_list_deref"), /*args=*/ArrayAttr{}, /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{refOp.getResult(0)}); + /*operands=*/ArrayRef<Value>{refOp.getResult()}); rewriter.create<emitc::CallOp>( /*location=*/loc, @@ -584,18 +583,17 @@ /*templateArgs=*/ArrayAttr{}, /*operands=*/ArrayRef<Value>{setOp.value()}); - auto valuePtrOp = rewriter.create<emitc::GetAddressOfOp>( + auto valuePtrOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*result=*/emitc::OpaqueType::get(ctx, "iree_vm_value_t*"), + /*applicableOperator=*/rewriter.getStringAttr("&"), /*operand=*/valueOp.getResult(0)); - auto refOp = rewriter.create<emitc::CallOp>( + auto refOp = rewriter.create<emitc::ApplyOp>( /*location=*/loc, /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t"), - /*callee=*/rewriter.getStringAttr("*"), - /*args=*/ArrayAttr{}, - /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{setOp.list()}); + /*applicableOperator=*/rewriter.getStringAttr("*"), + /*operand=*/setOp.list()); auto listDerefOp = rewriter.create<emitc::CallOp>( /*location=*/loc, @@ -603,7 +601,7 @@ /*callee=*/rewriter.getStringAttr("iree_vm_list_deref"), /*args=*/ArrayAttr{}, /*templateArgs=*/ArrayAttr{}, - /*operands=*/ArrayRef<Value>{refOp.getResult(0)}); + /*operands=*/ArrayRef<Value>{refOp.getResult()}); rewriter.create<emitc::CallOp>( /*location=*/loc,
diff --git a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir index cd85e9d..f8da971 100644 --- a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir +++ b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
@@ -1,7 +1,7 @@ -// RUN: iree-opt -iree-codegen-spirv-experimental-linalg-on-tensors -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv %s | IreeFileCheck %s -check-prefix=DEFAULT -// RUN: iree-opt -iree-codegen-spirv-experimental-linalg-on-tensors -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=qualcomm-adreno640-unknown-android10 %s | IreeFileCheck %s -check-prefix=ADRENO640 -// RUN: iree-opt -iree-codegen-spirv-experimental-linalg-on-tensors -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=valhall-g77-unknown-android10 %s | IreeFileCheck %s -check-prefix=MALIG77 -// RUN: iree-opt -iree-codegen-spirv-experimental-linalg-on-tensors -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=turing-t4-unknown-linux %s | IreeFileCheck %s -check-prefix=TURINGT4 +// RUN: iree-opt -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv %s | IreeFileCheck %s -check-prefix=DEFAULT +// RUN: iree-opt -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=qualcomm-adreno640-unknown-android10 %s | IreeFileCheck %s -check-prefix=ADRENO640 +// RUN: iree-opt -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=valhall-g77-unknown-android10 %s | IreeFileCheck %s -check-prefix=MALIG77 +// RUN: iree-opt -pass-pipeline='iree-hal-transformation-pipeline{serialize-executables=false}' -iree-hal-target-backends=vulkan-spirv -iree-vulkan-target-triple=turing-t4-unknown-linux %s | IreeFileCheck %s -check-prefix=TURINGT4 // TODO(antiagainst): Passing in lenghty strings as command-line options is not // optimal. We should consider creating a dedicated test pass to pick up
diff --git a/iree/hal/local/elf/platform/apple.c b/iree/hal/local/elf/platform/apple.c index 78b7acb..ae7aeeb 100644 --- a/iree/hal/local/elf/platform/apple.c +++ b/iree/hal/local/elf/platform/apple.c
@@ -29,6 +29,17 @@ #include <sys/mman.h> #include <unistd.h> +// MAP_JIT and related utilities are only available on MacOS 11.0+. +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0 +#define IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0(expr) \ + if (__builtin_available(macOS 11.0, *)) { \ + expr \ + } +#else +#define IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0(expr) +#endif // MAC_OS_VERSION_11_0 + //============================================================================== // Memory subsystem information and control //============================================================================== @@ -44,9 +55,21 @@ out_info->can_allocate_executable_pages = true; } -void iree_memory_jit_context_begin() { pthread_jit_write_protect_np(0); } +void iree_memory_jit_context_begin() { + IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({ + if (pthread_jit_write_protect_supported_np()) { + pthread_jit_write_protect_np(0); + } + }); +} -void iree_memory_jit_context_end() { pthread_jit_write_protect_np(1); } +void iree_memory_jit_context_end() { + IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({ + if (pthread_jit_write_protect_supported_np()) { + pthread_jit_write_protect_np(1); + } + }); +} //============================================================================== // Virtual address space manipulation @@ -71,9 +94,11 @@ int mmap_prot = PROT_NONE; int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE; - if (flags & IREE_MEMORY_VIEW_FLAG_EXECUTE) { - mmap_flags |= MAP_JIT; - } + IREE_APPLE_IF_AT_LEAST_MAC_OS_11_0({ + if (flags & IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE) { + mmap_flags |= MAP_JIT; + } + }); iree_status_t status = iree_ok_status(); void* base_address =
diff --git a/third_party/mlir-emitc b/third_party/mlir-emitc index 3c265bf..679d718 160000 --- a/third_party/mlir-emitc +++ b/third_party/mlir-emitc
@@ -1 +1 @@ -Subproject commit 3c265bf59bf2515a63ec35571c66954349749a62 +Subproject commit 679d7183b657a24f48d16de1fcefb20d7cd1f6a2