Drop usage of LLVMCodegenOptions. (#6084)
The use of LLVM style command line flags makes it harder to print out
reproducers or have a proper string representation of the pass
pipeline. It is better to use MLIR wrappers around these options. With
dynamic pass pipelines, there is a point to anchor these options.
Some of the options are now moved into specific passes. like the
unfusedFMAOps option is part of the ConvertToLLVM pass.
Some unnecessary tests are deleted.
Also enable use of vectorization pipeline on the VMVX backend. Since
the VMVX backend cannot handle vectors or vector operations, add a
flag to avoid vectorization but still do the tiling for the different
cache levels.
Fixes #5925
diff --git a/iree/compiler/Conversion/LinalgToLLVM/BUILD b/iree/compiler/Conversion/LinalgToLLVM/BUILD
index 18241c8..1fe6525 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/BUILD
+++ b/iree/compiler/Conversion/LinalgToLLVM/BUILD
@@ -15,7 +15,6 @@
srcs = [
"ConvertToLLVM.cpp",
"KernelDispatch.cpp",
- "LLVMCodeGenOptions.cpp",
"LinalgTileAndVectorizePass.cpp",
"LinalgVectorizePass.cpp",
"LowerExecutableTargetPass.cpp",
@@ -26,7 +25,6 @@
],
hdrs = [
"KernelDispatch.h",
- "LLVMCodeGenOptions.h",
"Passes.h",
],
deps = [
diff --git a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
index 2fdfd52..5a8318e 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -15,12 +15,10 @@
LinalgToLLVM
HDRS
"KernelDispatch.h"
- "LLVMCodeGenOptions.h"
"Passes.h"
SRCS
"ConvertToLLVM.cpp"
"KernelDispatch.cpp"
- "LLVMCodeGenOptions.cpp"
"LinalgTileAndVectorizePass.cpp"
"LinalgVectorizePass.cpp"
"LowerExecutableTargetPass.cpp"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
index 3a4645c..b7354c1 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
@@ -5,7 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h"
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
#include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
@@ -594,8 +593,10 @@
class ConvertToLLVMPass
: public PassWrapper<ConvertToLLVMPass, OperationPass<ModuleOp>> {
public:
- ConvertToLLVMPass(LLVMCodegenOptions options) : options_(options) {}
-
+ ConvertToLLVMPass(bool unfuseFMA = false) { unfuseFMAOps = unfuseFMA; }
+ ConvertToLLVMPass(const ConvertToLLVMPass &pass) {
+ unfuseFMAOps = pass.unfuseFMAOps;
+ }
void getDependentDialects(DialectRegistry ®istry) const override {
registry.insert<LLVM::LLVMDialect>();
}
@@ -603,7 +604,10 @@
void runOnOperation() override;
private:
- LLVMCodegenOptions options_;
+ Option<bool> unfuseFMAOps{
+ *this, "unfuse-fma-ops",
+ llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+ llvm::cl::init(false)};
};
} // namespace
@@ -710,7 +714,7 @@
// Post conversion patterns.
{
OwningRewritePatternList postPatterns(&getContext());
- if (options_.unfuseFMAOps) {
+ if (unfuseFMAOps) {
populateUnfusedFMAOpsPassPatterns(&getContext(), postPatterns);
(void)applyPatternsAndFoldGreedily(module, std::move(postPatterns));
}
@@ -718,18 +722,15 @@
}
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
- LLVMCodegenOptions options) {
- return std::make_unique<ConvertToLLVMPass>(options);
+ bool unfuseFMAOps) {
+ return std::make_unique<ConvertToLLVMPass>(unfuseFMAOps);
}
static PassRegistration<ConvertToLLVMPass> pass(
"iree-codegen-convert-to-llvm",
"Perform final conversion from Linalg/HAL/Shape/Vector/Standard to "
"LLVMIR dialect",
- [] {
- return std::make_unique<ConvertToLLVMPass>(
- getLLVMCodegenOptionsFromClOptions());
- });
+ [] { return std::make_unique<ConvertToLLVMPass>(); });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
deleted file mode 100644
index b38838b..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
-
-#include "llvm/Support/CommandLine.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-static llvm::cl::opt<bool> clConvImg2ColConversion(
- "iree-codegen-linalg-to-llvm-conv-img2col-conversion",
- llvm::cl::desc("Enable rewriting linalg.conv_2d_input_nhwc_filter_hwcf "
- "linalg.generic that does img2col buffer packing + "
- "linag.matmul"),
- llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clUnfusedFMA(
- "iree-codegen-linalg-to-llvm-use-unfused-fma",
- llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
- llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clEnableLinalgOnTensorsToVectors(
- "iree-codegen-linalg-to-llvm-linalg-on-tensors-to-vectors",
- llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
- llvm::cl::init(false));
-
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions() {
- LLVMCodegenOptions options;
- options.useConvImg2Col = clConvImg2ColConversion;
- options.unfuseFMAOps = clUnfusedFMA;
- options.useLinalgOnTensorsToVectors = clEnableLinalgOnTensorsToVectors;
- return options;
-}
-
-} // namespace iree_compiler
-} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
deleted file mode 100644
index b9fe214..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-#define IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Options used to configure LLVM passes.
-struct LLVMCodegenOptions {
- bool useConvImg2Col = false;
- // Target specific options.
- bool unfuseFMAOps = false;
- bool useVectorToAarch64 = false;
- bool useLinalgOnTensorsToVectors = false;
-};
-
-// Returns LLVM CodeGen options from command-line options.
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions();
-
-} // namespace iree_compiler
-} // namespace mlir
-
-#endif // IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
index 65f83b5..fce276c 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
@@ -68,11 +68,24 @@
namespace {
struct TileAndVectorizeWorkgroups
: public PassWrapper<TileAndVectorizeWorkgroups, FunctionPass> {
+ TileAndVectorizeWorkgroups(bool vectorize = true)
+ : lowerToVectors(vectorize) {}
+ TileAndVectorizeWorkgroups(const TileAndVectorizeWorkgroups &pass) {
+ lowerToVectors = pass.lowerToVectors;
+ }
void getDependentDialects(DialectRegistry ®istry) const override {
registry.insert<linalg::LinalgDialect, AffineDialect, scf::SCFDialect,
vector::VectorDialect>();
}
void runOnFunction() override;
+
+ private:
+ /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+ /// VMVX backend uses the same lowering as the CPU pass but there is no
+ /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+ /// same tiling scheme but avoid generating vector instructions. When VMVX can
+ /// handle vector instructions, drop this options.
+ bool lowerToVectors;
};
} // namespace
@@ -209,6 +222,10 @@
}
}
+ if (!lowerToVectors) {
+ return;
+ }
+
// Apply vectorization patterns.
{
OwningRewritePatternList vectorizationPatterns(&getContext());
@@ -281,8 +298,9 @@
}
}
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass() {
- return std::make_unique<TileAndVectorizeWorkgroups>();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+ bool lowerToVectors) {
+ return std::make_unique<TileAndVectorizeWorkgroups>(lowerToVectors);
}
static PassRegistration<TileAndVectorizeWorkgroups> pass(
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
index 0354a35..d307ea2 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
@@ -32,9 +32,12 @@
LLVM::LLVMDialect>();
}
- LowerExecutableTargetPass(LLVMCodegenOptions options) : options(options) {}
- LowerExecutableTargetPass(const LowerExecutableTargetPass &pass)
- : options(pass.options) {}
+ LowerExecutableTargetPass(bool vectorize = true)
+ : lowerToVectors(vectorize) {}
+ LowerExecutableTargetPass(const LowerExecutableTargetPass &pass) {
+ invokeLoweringPipelines = pass.invokeLoweringPipelines;
+ lowerToVectors = pass.lowerToVectors;
+ }
void runOnOperation() override;
@@ -47,7 +50,12 @@
"can be set to false for testing purposes."),
llvm::cl::init(true)};
- LLVMCodegenOptions options;
+ /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+ /// VMVX backend uses the same lowering as the CPU pass but there is no
+ /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+ /// same tiling scheme but avoid generating vector instructions. When VMVX can
+ /// handle vector instructions, drop this options.
+ bool lowerToVectors;
};
} // namespace
@@ -64,19 +72,19 @@
OpPassManager executableLoweringPipeline(
IREE::HAL::ExecutableTargetOp::getOperationName());
executableLoweringPipeline.addPass(createSetNumWorkgroupsPass());
+ OpPassManager &nestedModulePM = executableLoweringPipeline.nest<ModuleOp>();
if (invokeLoweringPipelines) {
IREE::HAL::DispatchLoweringPassPipeline passPipeline =
setPipeline.getValue();
switch (passPipeline) {
case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault:
- addCPUDefaultPassPipeline(executableLoweringPipeline, options);
+ addCPUDefaultPassPipeline(nestedModulePM);
break;
case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization:
- addCPUVectorizationPassPipeline(executableLoweringPipeline, options);
+ addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors);
break;
}
- addLowerToLLVMPasses(executableLoweringPipeline, options);
}
if (failed(runPipeline(executableLoweringPipeline, targetOp))) {
@@ -85,18 +93,15 @@
}
std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options) {
- return std::make_unique<LowerExecutableTargetPass>(options);
+createLowerExecutableTargetPass(bool lowerToVectors) {
+ return std::make_unique<LowerExecutableTargetPass>(lowerToVectors);
}
static PassRegistration<LowerExecutableTargetPass> pass(
"iree-lower-executable-target-pass",
- "Perform lowering of executable target to export dialects. Currently "
- "lowers to LLVM dialect",
- [] {
- return std::make_unique<LowerExecutableTargetPass>(
- getLLVMCodegenOptionsFromClOptions());
- });
+ "Perform lowering of executable target using one of the "
+ "IREE::HAL::DispatchLoweringPassPipeline",
+ [] { return std::make_unique<LowerExecutableTargetPass>(); });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
index 9cc021b..3264e6d 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -26,77 +26,72 @@
}
void addCPUVectorizationPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
- nestedModulePM.addPass(createCanonicalizerPass());
+ bool lowerToVectors) {
+ passManager.addPass(createCanonicalizerPass());
// TODO(ataei): This causes segmentation fault on Android. Fix it and
// re-enable.
- // nestedModulePM.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
+ // passManager.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
- // TODO(ataei): We want to enable when tensor -> vector pass is fully
- // supported which requires first moving vector-tiling before this step.
- if (options.useLinalgOnTensorsToVectors) {
- nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
- }
// Use stack allocation on CPU side.
- addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
+ addLinalgBufferizePasses(passManager, cpuAllocationFunction);
// Tile and vectorize linalg ops.
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(
- createLinalgTileAndVectorizeWorkgroupsPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(
+ createLinalgTileAndVectorizeWorkgroupsPass(lowerToVectors));
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
- nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+ passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
}
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
- nestedModulePM.addPass(createCanonicalizerPass());
+void addCPUDefaultPassPipeline(OpPassManager &passManager) {
+ passManager.addPass(createCanonicalizerPass());
// Use stack allocation on CPU side.
- addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
- nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+ addLinalgBufferizePasses(passManager, cpuAllocationFunction);
+ passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
}
-void addLowerToLLVMPasses(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+static void addLowerToLLVMPasses(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
// Linalg -> SCF
- nestedModulePM.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+ passManager.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createCSEPass());
// SCF -> STD
- nestedModulePM.addNestedPass<FuncOp>(createLowerToCFGPass());
- nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
- nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+ passManager.addNestedPass<FuncOp>(createLowerToCFGPass());
+ passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+ passManager.addNestedPass<FuncOp>(createCSEPass());
// Handled tensor-type constants.
- nestedModulePM.addPass(createTensorConstantBufferizePass());
- nestedModulePM.addPass(createFoldTensorExtractOpPass());
+ passManager.addPass(createTensorConstantBufferizePass());
+ passManager.addPass(createFoldTensorExtractOpPass());
// (HAL, IREE, Linalg, STD) -> LLVM
- nestedModulePM.addPass(createConvertToLLVMPass(options));
+ passManager.addPass(createConvertToLLVMPass());
- nestedModulePM.addPass(createCanonicalizerPass());
- nestedModulePM.addPass(createCSEPass());
+ passManager.addPass(createCanonicalizerPass());
+ passManager.addPass(createCSEPass());
}
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options) {
- passManager.addPass(createLowerExecutableTargetPass(options));
+void buildLLVMTransformPassPipeline(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
+ OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+ addLowerToLLVMPasses(nestedModulePM, options);
}
-static PassPipelineRegistration<> linalgLLVMVPipeline(
- "iree-codegen-linalg-to-llvm-pipeline",
- "Runs the progressive lowering pipeline from Linalg to LLVM",
- [](OpPassManager &passManager) {
- buildLLVMTransformPassPipeline(passManager,
- getLLVMCodegenOptionsFromClOptions());
- });
+static PassPipelineRegistration<LLVMTransformPassPipelineOptions>
+ linalgLLVMVPipeline(
+ "iree-codegen-linalg-to-llvm-pipeline",
+ "Runs the progressive lowering pipeline from Linalg to LLVM",
+ [](OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options) {
+ buildLLVMTransformPassPipeline(passManager, options);
+ });
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.h b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
index 7f626d6..252c2b0 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.h
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
@@ -7,9 +7,10 @@
#ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
#define IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassOptions.h"
namespace mlir {
namespace iree_compiler {
@@ -22,12 +23,9 @@
/// vector size.
std::unique_ptr<OperationPass<FuncOp>> createPadLinalgWorkgroupTilesPass();
-/// Distributes linalg ops among hal.interface.workgroup logical threads.
-std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLinalgTileAndDistributePass();
-
/// Vectorizes linalg ops executed in the same hal.interface.workgroup.
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+ bool lowerToVectors = true);
/// Replaces llvm.intr.fma with its unfused mul and add ops.
std::unique_ptr<FunctionPass> createUnfusedFMAOpsPass();
@@ -37,42 +35,50 @@
/// Performs the final conversion to LLVM dialect.
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
- LLVMCodegenOptions options);
+ bool unfuseFMAOps = false);
/// Pass to convert Linalg ops into vector operations.
std::unique_ptr<FunctionPass> createLinalgVectorizePass();
//===----------------------------------------------------------------------===//
-// Pass Pipelines for CPU Lowering
+// Pass Pipelines for CPU Lowering.
//===----------------------------------------------------------------------===//
/// Populates the passes to lower to scalars operations for linalg based
/// code-generation. This pipeline does not vectorize, but instead just converts
/// to memrefs
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
+void addCPUDefaultPassPipeline(OpPassManager &passManager);
/// Populates the passes needed to lower to vector operations using linalg based
/// progressive lowering with vectorization after bufferization.
void addCPUVectorizationPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
-
-/// Populates the passes needed to lower scalar/native vector code to LLVM
-/// Dialect.
-void addLowerToLLVMPasses(OpPassManager &passManager,
- LLVMCodegenOptions options);
+ bool lowerToVectors = true);
/// Pass to lower the module an hal.executable.target operation to external
/// dialect. Currently this pass lowers to LLVM dialect, but could be
/// generalized to lower to any "final" dialect like SPIR-V/NVVM, etc.
std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options);
+createLowerExecutableTargetPass(bool lowerToVectors = true);
+
+//===----------------------------------------------------------------------===//
+// Pass Pipelines for lowering to LLVM dialect.
+//===----------------------------------------------------------------------===//
+
+/// Options for LLVM pipeline.
+struct LLVMTransformPassPipelineOptions
+ : public PassPipelineOptions<LLVMTransformPassPipelineOptions> {
+ Option<bool> unfuseFMAOps{
+ *this, "unfuse-fma-ops",
+ llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+ llvm::cl::init(false)};
+};
/// Populates passes needed to lower a XLA HLO op to LLVM dialect via the
/// structured ops path. The pass manager `pm` in here should operate on the
/// module within the IREE::HAL::ExecutableOp.
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
- LLVMCodegenOptions options);
+void buildLLVMTransformPassPipeline(
+ OpPassManager &passManager,
+ const LLVMTransformPassPipelineOptions &options);
} // namespace iree_compiler
} // namespace mlir
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 60c27fd..7a74a17 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h
@@ -57,7 +57,8 @@
inline void registerLinalgToLLVMPasses() {
static bool init_once = []() {
- createLowerExecutableTargetPass(LLVMCodegenOptions());
+ createLowerExecutableTargetPass();
+ createLinalgVectorizePass();
// LinalgToLLVM
createLinalgTileAndVectorizeWorkgroupsPass();
createUnfusedFMAOpsPass();
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 6c79804..ad08cb0 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -8,7 +8,6 @@
#include <cstdlib>
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
#include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
#include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
#include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h"
@@ -74,13 +73,13 @@
}
void buildTranslationPassPipeline(OpPassManager &passManager) override {
- auto codeGenOptions = getLLVMCodegenOptionsFromClOptions();
+ passManager.addPass(createLowerExecutableTargetPass());
// Set target specific options.
// TODO(ataei): This is temporary here, should move when target specific
// overrides options grows.
llvm::Triple triple(options_.targetTriple);
+ LLVMTransformPassPipelineOptions codeGenOptions;
if (triple.isWasm()) {
- // WebAssembly does not (yet) support FMA ops natively, so unfuse them.
codeGenOptions.unfuseFMAOps = true;
}
buildLLVMTransformPassPipeline(passManager, codeGenOptions);
diff --git a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
index 94829f1..9438429 100644
--- a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
@@ -27,44 +27,18 @@
namespace IREE {
namespace VMVX {
-// NOTE:
-// NOTE: THIS IS ALL JUST A HACK
-// NOTE:
-// NOTE: this entire pipeline needs to be reworked - it's been randomly
-// NOTE: constructed to "work" for a few samples by someone who does not
-// NOTE: understand the codegen system :)
-// NOTE:
-
static void buildVectorVMVXTransformPassPipeline(OpPassManager &passManager) {
+ // For now lower using the default CPU pass-pipeline which doesn't
+ // vectorize. When VMVX can lower vector operations, this can be relaxed.
+ passManager.addPass(
+ createLowerExecutableTargetPass(/*lowerToVectors=*/false));
+
OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
// ---------------------------------------------------------------------------
- // Configuration
- // ---------------------------------------------------------------------------
-
- // TODO(#5925): This can also be modified to just use the dynamic pass
- // pipeline like the CPU side.
- // passManager.addPass(createMaterializeCPULaunchConfigurationPass());
- passManager.addPass(createSetNumWorkgroupsPass());
-
- // ---------------------------------------------------------------------------
// Linalg -> Vectors
// ---------------------------------------------------------------------------
- nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
-
- // Use stack allocation for transient buffers.
- WorkgroupMemoryAllocationFn allocationFn =
- [](OpBuilder &builder, Location loc, ArrayRef<int64_t> staticShape,
- Type elementType, ArrayRef<Value> dynamicSizes) {
- MemRefType allocType = MemRefType::get(staticShape, elementType);
- return builder.create<memref::AllocaOp>(loc, allocType, dynamicSizes);
- };
- addLinalgBufferizePasses(nestedModulePM, allocationFn);
- nestedModulePM.addPass(createPromoteBuffersToStackPass(
- /*maxAllocSizeInBytes=*/1 << 10, /*bitwidthOfIndexType=*/32,
- /*maxRankOfAllocatedMemRef=*/10));
-
nestedModulePM.addNestedPass<FuncOp>(createResolveShapeOpsPass());
nestedModulePM.addNestedPass<FuncOp>(
Shape::createCleanupShapePlaceholdersPass());
diff --git a/iree/test/e2e/llvm_specific/BUILD b/iree/test/e2e/llvm_specific/BUILD
deleted file mode 100644
index e79dfcf..0000000
--- a/iree/test/e2e/llvm_specific/BUILD
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2019 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# Tests for end-to-end IREE support specific to the LLVM lowering.
-# TODO(ravishankarm): Reorganize these tests.
-
-load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")
-
-package(
- default_visibility = ["//visibility:public"],
- features = ["layering_check"],
- licenses = ["notice"], # Apache 2.0
-)
-
-iree_check_single_backend_test_suite(
- name = "check_llvm-aot-conv_img2col",
- srcs = [
- "conv.mlir",
- ],
- compiler_flags = [
- "-iree-input-type=mhlo",
- "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true",
- ],
- driver = "dylib",
- target_backend = "dylib-llvm-aot",
-)
diff --git a/iree/test/e2e/llvm_specific/CMakeLists.txt b/iree/test/e2e/llvm_specific/CMakeLists.txt
deleted file mode 100644
index ab49aff..0000000
--- a/iree/test/e2e/llvm_specific/CMakeLists.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
-# iree/test/e2e/llvm_specific/BUILD #
-# #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
-# CMake-only content. #
-# #
-# To disable autogeneration for this file entirely, delete this header. #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_check_single_backend_test_suite(
- NAME
- check_llvm-aot-conv_img2col
- SRCS
- "conv.mlir"
- TARGET_BACKEND
- "dylib-llvm-aot"
- DRIVER
- "dylib"
- COMPILER_FLAGS
- "-iree-input-type=mhlo"
- "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true"
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/llvm_specific/conv.mlir b/iree/test/e2e/llvm_specific/conv.mlir
deleted file mode 100644
index e969347..0000000
--- a/iree/test/e2e/llvm_specific/conv.mlir
+++ /dev/null
@@ -1,257 +0,0 @@
-func @conv2d_nopadding() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0]]]]> : tensor<1x4x4x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[1310.0],[1466.0],[1622.0]],
- [[2090.0],[2246.0],[2402.0]]
- ]]> : tensor<1x2x3x1xf32>) : tensor<1x2x3x1xf32>
- return
-}
-
-func @conv2d_1452x3221_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[[
- [[ 1.0, 2.0], [ 3.0, 4.0], [ 5.0, 6.0], [ 7.0, 8.0], [ 9.0, 10.0]],
- [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
- [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]],
- [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0], [39.0, 40.0]]]]> : tensor<1x4x5x2xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
- [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
- [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<1x4x5x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[[
- [[ 600.0], [ 736.0], [ 872.0], [1008.0], [ 476.0]],
- [[1310.0], [1466.0], [1622.0], [1778.0], [ 805.0]],
- [[2090.0], [2246.0], [2402.0], [2558.0], [1135.0]],
- [[1080.0], [1152.0], [1224.0], [1296.0], [ 524.0]]]]> : tensor<1x4x5x1xf32>) : tensor<1x4x5x1xf32>
- return
-}
-
-func @conv2d_2451x2311_same() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
- [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
- [[11.0], [12.0], [13.0], [14.0], [15.0]],
- [[16.0], [17.0], [18.0], [19.0], [20.0]]],
- [[[21.0], [22.0], [23.0], [24.0], [25.0]],
- [[26.0], [27.0], [28.0], [29.0], [30.0]],
- [[31.0], [32.0], [33.0], [34.0], [35.0]],
- [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[1.0]], [[2.0]], [[3.0]]],
- [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
- [[160.0], [226.0], [247.0], [268.0], [160.0]],
- [[240.0], [331.0], [352.0], [373.0], [220.0]],
- [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
- [[[400.0], [541.0], [562.0], [583.0], [340.0]],
- [[480.0], [646.0], [667.0], [688.0], [400.0]],
- [[560.0], [751.0], [772.0], [793.0], [460.0]],
- [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
- return
-}
-
-func @conv2d_no_padding2() attributes { iree.module.export } {
- %inputs = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0],
- [ 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0],
- [ 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0]],
- [[ 16.0, 17.0, 18.0],
- [ 19.0, 20.0, 21.0],
- [ 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0],
- [ 28.0, 29.0, 30.0]],
- [[ 31.0, 32.0, 33.0],
- [ 34.0, 35.0, 36.0],
- [ 37.0, 38.0, 39.0],
- [ 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0]],
- [[ 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0],
- [ 52.0, 53.0, 54.0],
- [ 55.0, 56.0, 57.0],
- [ 58.0, 59.0, 60.0]]],
- [[[ 61.0, 62.0, 63.0],
- [ 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0],
- [ 70.0, 71.0, 72.0],
- [ 73.0, 74.0, 75.0]],
- [[ 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0],
- [ 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0],
- [ 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0],
- [ 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0],
- [100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0]],
- [[106.0, 107.0, 108.0],
- [109.0, 110.0, 111.0],
- [112.0, 113.0, 114.0],
- [115.0, 116.0, 117.0],
- [118.0, 119.0, 120.0]]]]> : tensor<2x4x5x3xf32>
- %weights = iree.unfoldable_constant dense<[
- [[[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
- [ 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
- [ 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]],
- [[ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0],
- [ 25.0, 26.0, 27.0, 28.0, 29.0, 30.0],
- [ 31.0, 32.0, 33.0, 34.0, 35.0, 36.0]],
- [[ 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],
- [ 43.0, 44.0, 45.0, 46.0, 47.0, 48.0],
- [ 49.0, 50.0, 51.0, 52.0, 53.0, 54.0]]],
- [[[ 55.0, 56.0, 57.0, 58.0, 59.0, 60.0],
- [ 61.0, 62.0, 63.0, 64.0, 65.0, 66.0],
- [ 67.0, 68.0, 69.0, 70.0, 71.0, 72.0]],
- [[ 73.0, 74.0, 75.0, 76.0, 77.0, 78.0],
- [ 79.0, 80.0, 81.0, 82.0, 83.0, 84.0],
- [ 85.0, 86.0, 87.0, 88.0, 89.0, 90.0]],
- [[ 91.0, 92.0, 93.0, 94.0, 95.0, 96.0],
- [ 97.0, 98.0, 99.0, 100.0, 101.0, 102.0],
- [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
- feature_group_count = 1 : i64,
- rhs_dilation = dense<1> : tensor<2xi64>,
- window_strides = dense<1> : tensor<2xi64>} :
- (tensor<2x4x5x3xf32>, tensor<2x3x3x6xf32>) -> tensor<2x3x3x6xf32>
- check.expect_almost_eq_const(%res, dense<[
- [[[16065.0, 16290.0, 16515.0, 16740.0, 16965.0, 17190.0],
- [18873.0, 19152.0, 19431.0, 19710.0, 19989.0, 20268.0],
- [21681.0, 22014.0, 22347.0, 22680.0, 23013.0, 23346.0]],
- [[30105.0, 30600.0, 31095.0, 31590.0, 32085.0, 32580.0],
- [32913.0, 33462.0, 34011.0, 34560.0, 35109.0, 35658.0],
- [35721.0, 36324.0, 36927.0, 37530.0, 38133.0, 38736.0]],
- [[44145.0, 44910.0, 45675.0, 46440.0, 47205.0, 47970.0],
- [46953.0, 47772.0, 48591.0, 49410.0, 50229.0, 51048.0],
- [49761.0, 50634.0, 51507.0, 52380.0, 53253.0, 54126.0]]],
- [[[72225.0, 73530.0, 74835.0, 76140.0, 77445.0, 78750.0],
- [75033.0, 76392.0, 77751.0, 79110.0, 80469.0, 81828.0],
- [77841.0, 79254.0, 80667.0, 82080.0, 83493.0, 84906.0]],
- [[86265.0, 87840.0, 89415.0, 90990.0, 92565.0, 94140.0],
- [89073.0, 90702.0, 92331.0, 93960.0, 95589.0, 97218.0],
- [91881.0, 93564.0, 95247.0, 96930.0, 98613.0, 100296.0]],
- [[100305.0, 102150.0, 103995.0, 105840.0, 107685.0, 109530.0],
- [103113.0, 105012.0, 106911.0, 108810.0, 110709.0, 112608.0],
- [105921.0, 107874.0, 109827.0, 111780.0, 113733.0, 115686.0]]]]> : tensor<2x3x3x6xf32>) : tensor<2x3x3x6xf32>
- return
-}
-
-func @conv_1d() {
- %inputs = iree.unfoldable_constant dense<2.0> : tensor<3x8x1xf32>
- %weights = iree.unfoldable_constant dense<2.0> : tensor<3x1x1xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 2 : i64,
- input_spatial_dimensions = dense<[1]> : tensor<1xi64>,
- kernel_input_feature_dimension = 1 : i64,
- kernel_output_feature_dimension = 2 : i64,
- kernel_spatial_dimensions = dense<[0]> : tensor<1xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 2 : i64,
- output_spatial_dimensions = dense<[1]> : tensor<1xi64>
- },
- feature_group_count = 1 : i64,
- padding = dense<0> : tensor<1x2xi64>,
- rhs_dilation = dense<1> : tensor<1xi64>,
- window_strides = dense<1> : tensor<1xi64>
- } : (tensor<3x8x1xf32>, tensor<3x1x1xf32>) -> tensor<3x6x1xf32>
- check.expect_almost_eq_const(%res, dense<12.0> : tensor<3x6x1xf32>) : tensor<3x6x1xf32>
- return
-}
-
-func @conv_3d() {
- %inputs = iree.unfoldable_constant dense<1.0> : tensor<2x8x8x8x3xf32>
- %weights = iree.unfoldable_constant dense<1.0> : tensor<2x2x2x3x2xf32>
- %res = "mhlo.convolution"(%inputs, %weights) {
- batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 4 : i64,
- input_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>,
- kernel_input_feature_dimension = 3 : i64,
- kernel_output_feature_dimension = 4 : i64,
- kernel_spatial_dimensions = dense<[0, 1, 2]> : tensor<3xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 4 : i64,
- output_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>
- },
- feature_group_count = 1 : i64,
- padding = dense<0> : tensor<3x2xi64>,
- rhs_dilation = dense<1> : tensor<3xi64>,
- window_strides = dense<1> : tensor<3xi64>
- } : (tensor<2x8x8x8x3xf32>, tensor<2x2x2x3x2xf32>) -> tensor<2x7x7x7x2xf32>
- check.expect_almost_eq_const(%res, dense<24.0> : tensor<2x7x7x7x2xf32>) : tensor<2x7x7x7x2xf32>
- return
-}