Integrate llvm/llvm-project@4f15267d (#12226)
* Reset third_party/llvm-project:
4f15267d3dd797a15901fe9352f0d5fa121b9095 (2023-02-15 16:52:25 +0100):
[libc++][NFC] Replace _LIBCPP_STD_VER > x with _LIBCPP_STD_VER >= x
* Updated to tensorflow/tensorflow@75eaca4
* Updated to tensorflow/mlir-hlo@a913e03
* Cherry picked MLIR bug fix llvm/llvm-project@3cf7f22
* Cherry-picked MLIR bug fix llvm/llvm-project@e44f405
* Used `llvm/TargetParser/Host.h` to replace `llvm/Support/Host.h`
* Used `llvm::bit_vector` to replace `llvm::PowerOf2Floor`
* Updated GPU memory space handling in converting to LLVM
* Run `python compiler/src/iree/compiler/API2/generate_exports.py`
* Fixed bufferization issue in transform dialect path
---------
Co-authored-by: Hanhan Wang <hanchung@google.com>
Co-authored-by: Thomas Raoux <thomasraoux@google.com>
Co-authored-by: Matthias Springer <springerm@google.com>
diff --git a/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp b/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
index f5dd5a4..ec63010 100644
--- a/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
+++ b/compiler/src/iree/compiler/API2/Internal/LLDToolEntryPoint.cpp
@@ -22,11 +22,11 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/Process.h"
+#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/Triple.h"
using namespace lld;
diff --git a/compiler/src/iree/compiler/API2/api_exports.c b/compiler/src/iree/compiler/API2/api_exports.c
index 7fd6604..c82f363 100644
--- a/compiler/src/iree/compiler/API2/api_exports.c
+++ b/compiler/src/iree/compiler/API2/api_exports.c
@@ -302,6 +302,8 @@
extern void mlirFlatSymbolRefAttrGet();
extern void mlirFlatSymbolRefAttrGetValue();
extern void mlirFloat8E4M3FNTypeGet();
+extern void mlirFloat8E4M3FNUZTypeGet();
+extern void mlirFloat8E5M2FNUZTypeGet();
extern void mlirFloat8E5M2TypeGet();
extern void mlirFloatAttrDoubleGet();
extern void mlirFloatAttrDoubleGetChecked();
@@ -533,7 +535,9 @@
extern void mlirTypeIsAF32();
extern void mlirTypeIsAF64();
extern void mlirTypeIsAFloat8E4M3FN();
+extern void mlirTypeIsAFloat8E4M3FNUZ();
extern void mlirTypeIsAFloat8E5M2();
+extern void mlirTypeIsAFloat8E5M2FNUZ();
extern void mlirTypeIsAFunction();
extern void mlirTypeIsAIndex();
extern void mlirTypeIsAInteger();
@@ -880,6 +884,8 @@
x += (uintptr_t)&mlirFlatSymbolRefAttrGet;
x += (uintptr_t)&mlirFlatSymbolRefAttrGetValue;
x += (uintptr_t)&mlirFloat8E4M3FNTypeGet;
+ x += (uintptr_t)&mlirFloat8E4M3FNUZTypeGet;
+ x += (uintptr_t)&mlirFloat8E5M2FNUZTypeGet;
x += (uintptr_t)&mlirFloat8E5M2TypeGet;
x += (uintptr_t)&mlirFloatAttrDoubleGet;
x += (uintptr_t)&mlirFloatAttrDoubleGetChecked;
@@ -1111,7 +1117,9 @@
x += (uintptr_t)&mlirTypeIsAF32;
x += (uintptr_t)&mlirTypeIsAF64;
x += (uintptr_t)&mlirTypeIsAFloat8E4M3FN;
+ x += (uintptr_t)&mlirTypeIsAFloat8E4M3FNUZ;
x += (uintptr_t)&mlirTypeIsAFloat8E5M2;
+ x += (uintptr_t)&mlirTypeIsAFloat8E5M2FNUZ;
x += (uintptr_t)&mlirTypeIsAFunction;
x += (uintptr_t)&mlirTypeIsAIndex;
x += (uintptr_t)&mlirTypeIsAInteger;
diff --git a/compiler/src/iree/compiler/API2/api_exports.def b/compiler/src/iree/compiler/API2/api_exports.def
index e3316c9..6e91e69 100644
--- a/compiler/src/iree/compiler/API2/api_exports.def
+++ b/compiler/src/iree/compiler/API2/api_exports.def
@@ -294,6 +294,8 @@
mlirFlatSymbolRefAttrGet
mlirFlatSymbolRefAttrGetValue
mlirFloat8E4M3FNTypeGet
+ mlirFloat8E4M3FNUZTypeGet
+ mlirFloat8E5M2FNUZTypeGet
mlirFloat8E5M2TypeGet
mlirFloatAttrDoubleGet
mlirFloatAttrDoubleGetChecked
@@ -525,7 +527,9 @@
mlirTypeIsAF32
mlirTypeIsAF64
mlirTypeIsAFloat8E4M3FN
+ mlirTypeIsAFloat8E4M3FNUZ
mlirTypeIsAFloat8E5M2
+ mlirTypeIsAFloat8E5M2FNUZ
mlirTypeIsAFunction
mlirTypeIsAIndex
mlirTypeIsAInteger
diff --git a/compiler/src/iree/compiler/API2/api_exports.ld b/compiler/src/iree/compiler/API2/api_exports.ld
index 435d025..55522d7 100644
--- a/compiler/src/iree/compiler/API2/api_exports.ld
+++ b/compiler/src/iree/compiler/API2/api_exports.ld
@@ -295,6 +295,8 @@
mlirFlatSymbolRefAttrGet;
mlirFlatSymbolRefAttrGetValue;
mlirFloat8E4M3FNTypeGet;
+ mlirFloat8E4M3FNUZTypeGet;
+ mlirFloat8E5M2FNUZTypeGet;
mlirFloat8E5M2TypeGet;
mlirFloatAttrDoubleGet;
mlirFloatAttrDoubleGetChecked;
@@ -526,7 +528,9 @@
mlirTypeIsAF32;
mlirTypeIsAF64;
mlirTypeIsAFloat8E4M3FN;
+ mlirTypeIsAFloat8E4M3FNUZ;
mlirTypeIsAFloat8E5M2;
+ mlirTypeIsAFloat8E5M2FNUZ;
mlirTypeIsAFunction;
mlirTypeIsAIndex;
mlirTypeIsAInteger;
diff --git a/compiler/src/iree/compiler/API2/api_exports.macos.lst b/compiler/src/iree/compiler/API2/api_exports.macos.lst
index b9786ad..40d66a0 100644
--- a/compiler/src/iree/compiler/API2/api_exports.macos.lst
+++ b/compiler/src/iree/compiler/API2/api_exports.macos.lst
@@ -293,6 +293,8 @@
_mlirFlatSymbolRefAttrGet
_mlirFlatSymbolRefAttrGetValue
_mlirFloat8E4M3FNTypeGet
+_mlirFloat8E4M3FNUZTypeGet
+_mlirFloat8E5M2FNUZTypeGet
_mlirFloat8E5M2TypeGet
_mlirFloatAttrDoubleGet
_mlirFloatAttrDoubleGetChecked
@@ -524,7 +526,9 @@
_mlirTypeIsAF32
_mlirTypeIsAF64
_mlirTypeIsAFloat8E4M3FN
+_mlirTypeIsAFloat8E4M3FNUZ
_mlirTypeIsAFloat8E5M2
+_mlirTypeIsAFloat8E5M2FNUZ
_mlirTypeIsAFunction
_mlirTypeIsAIndex
_mlirTypeIsAInteger
diff --git a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
index 56429f8..705c1aa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
+++ b/compiler/src/iree/compiler/Codegen/Common/FoldTensorExtractOp.td
@@ -14,7 +14,9 @@
// Canonicalize unnecessary tensor_load when the load is used just for
// an extract
-def : Pat<(Tensor_ExtractOp (Bufferization_ToTensorOp $value), $indices),
+def : Pat<(Tensor_ExtractOp(Bufferization_ToTensorOp $value, $restrict,
+ $writable),
+ $indices),
(LoadOp $value, $indices, ConstBoolAttrFalse)>;
#endif // IREE_COMPILER_CODEGEN_COMMON_FOLDTENSOREXTRACTOP
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
index b281ed8..ca7a346 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformExtensions/CommonExtensions.cpp
@@ -1054,12 +1054,7 @@
options.memCpyFn = memCpyFn;
options.testAnalysisOnly = getTestAnalysisOnly();
options.printConflicts = getPrintConflicts();
- WalkResult res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
- if (failed(runIREEOneShotBufferize(moduleOp, options)))
- return WalkResult::interrupt();
- return WalkResult::advance();
- });
- if (res.wasInterrupted())
+ if (failed(runIREEOneShotBufferize(state.getTopLevel(), options)))
return DiagnosedSilenceableFailure::definiteFailure();
// Early exit if test_analysis_only is set.
@@ -1071,7 +1066,7 @@
// 3. Post-bufferization passes are fine.
PassManager pm(getContext());
addIREEPostBufferizationPasses(pm);
- res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
+ WalkResult res = state.getTopLevel()->walk([&](ModuleOp moduleOp) {
if (failed(pm.run(moduleOp))) {
getOperation()->emitError()
<< "failed to post-bufferization passes on module:\n"
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 4fff146..2925394 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -767,9 +767,7 @@
// Cast to the desired memref element type.
auto elementType = typeConverter->convertType(memRefType.getElementType());
Value typedPtrValue = builder.create<LLVM::BitcastOp>(
- loc,
- LLVM::LLVMPointerType::get(elementType, memRefType.getMemorySpaceAsInt()),
- basePtrValue);
+ loc, LLVM::LLVMPointerType::get(elementType), basePtrValue);
// Construct the MemRefDescriptor type based on the information we have.
// NOTE: we could use the binding length to clamp this/check that the
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 8308849..901f93f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -374,8 +374,8 @@
}
// Fallback to power of 2 if there's no hint or can't find the ideal size.
if (vectorSize <= 1 || candidateTileSize == 1) {
- candidateTileSize =
- std::max<int64_t>(llvm::PowerOf2Floor(targetSize), minTileSizes[i]);
+ candidateTileSize = std::max<int64_t>(
+ llvm::bit_floor<uint64_t>(targetSize), minTileSizes[i]);
}
// Limit the workload per workgroup to the default being the max to keep the
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
index e397b48..1acc08e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
@@ -154,7 +154,12 @@
LogicalResult matchAndRewrite(memref::AllocOp allocOp,
PatternRewriter &rewriter) const override {
- if (allocOp.getType().getMemorySpaceAsInt() != 3) return failure();
+ auto addressSpace = allocOp.getType()
+ .getMemorySpace()
+ .dyn_cast_or_null<gpu::AddressSpaceAttr>();
+ if (!addressSpace ||
+ addressSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace())
+ return failure();
ArrayRef<int64_t> shape = allocOp.getType().getShape();
if (llvm::any_of(shape,
[](int64_t dim) { return dim == ShapedType::kDynamic; })) {
@@ -266,8 +271,7 @@
funcOp.walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
auto memrefType = subspanOp.getType().cast<MemRefType>();
Type elType = memrefType.getElementType();
- auto llvmType =
- LLVM::LLVMPointerType::get(elType, memrefType.getMemorySpaceAsInt());
+ auto llvmType = LLVM::LLVMPointerType::get(elType);
llvmInputTypes[argMapping[SetBinding(subspanOp.getSet(),
subspanOp.getBinding())]] = llvmType;
});
@@ -388,8 +392,7 @@
loc, llvmBufferBasei8Ptr.getType(), llvmBufferBasei8Ptr,
adaptor.getByteOffset());
}
- auto llvmPtrType = LLVM::LLVMPointerType::get(
- memrefType.getElementType(), memrefType.getMemorySpaceAsInt());
+ auto llvmPtrType = LLVM::LLVMPointerType::get(memrefType.getElementType());
Value llvmBufferBasePtr =
rewriter.create<LLVM::BitcastOp>(loc, llvmPtrType, llvmBufferBasei8Ptr);
if (memrefType.hasStaticShape()) {
@@ -519,5 +522,20 @@
return std::make_unique<TestLLVMGPULegalizeOpPass>();
}
+static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
+ return IntegerAttr::get(IntegerType::get(ctx, 64), space);
+}
+
+void populateGpuMemorySpaceAttributeConversions(
+ TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
+ typeConverter.addTypeAttributeConversion(
+ [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
+ gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
+ unsigned addressSpace = mapping(memorySpace);
+ return wrapNumericMemorySpace(memorySpaceAttr.getContext(),
+ addressSpace);
+ });
+}
+
} // namespace iree_compiler
} // namespace mlir
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
index e10f93a..e683869 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h
@@ -10,6 +10,9 @@
#include "mlir/Conversion/LLVMCommon/Pattern.h"
namespace mlir {
+namespace gpu {
+enum class AddressSpace : uint32_t;
+}
namespace iree_compiler {
/// Verifies compatibility of the module for application of the LLVM
@@ -33,6 +36,11 @@
void ConvertToDynamicSharedMemory(ModuleOp moduleOp);
+using MemorySpaceMapping =
+ std::function<unsigned(gpu::AddressSpace gpuAddressSpace)>;
+void populateGpuMemorySpaceAttributeConversions(
+ TypeConverter &typeConverter, const MemorySpaceMapping &mapping);
+
} // namespace iree_compiler
} // namespace mlir
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
index 9095dc6..0d4959a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
@@ -42,13 +42,16 @@
LogicalResult matchAndRewrite(memref::DeallocOp op,
PatternRewriter &rewriter) const override {
- unsigned addressSpace =
- op.getMemref().getType().cast<MemRefType>().getMemorySpaceAsInt();
- if (addressSpace == NVVM::NVVMMemorySpace::kSharedMemorySpace) {
- rewriter.eraseOp(op);
- return success();
- }
- return failure();
+ auto addressSpace = op.getMemref()
+ .getType()
+ .cast<MemRefType>()
+ .getMemorySpace()
+ .dyn_cast_or_null<gpu::AddressSpaceAttr>();
+ if (!addressSpace ||
+ addressSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace())
+ return failure();
+ rewriter.eraseOp(op);
+ return success();
}
};
@@ -72,6 +75,21 @@
LowerToLLVMOptions options(m.getContext(), DataLayout(m));
options.overrideIndexBitwidth(64);
LLVMTypeConverter converter(m.getContext(), options);
+ populateGpuMemorySpaceAttributeConversions(
+ converter, [](gpu::AddressSpace space) -> unsigned {
+ switch (space) {
+ case gpu::AddressSpace::Global:
+ return static_cast<unsigned>(
+ NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+ case gpu::AddressSpace::Workgroup:
+ return static_cast<unsigned>(
+ NVVM::NVVMMemorySpace::kSharedMemorySpace);
+ case gpu::AddressSpace::Private:
+ return 0;
+ }
+ llvm_unreachable("unknown address space enum value");
+ return 0;
+ });
// Lowering for MMAMatrixType.
converter.addConversion([&](gpu::MMAMatrixType type) -> Type {
return convertMMAToLLVMType(type);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
index 1e393a4..c9c5ce6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
@@ -48,6 +48,19 @@
LowerToLLVMOptions options(m.getContext(), DataLayout(m));
options.overrideIndexBitwidth(64);
LLVMTypeConverter converter(m.getContext(), options);
+ populateGpuMemorySpaceAttributeConversions(
+ converter, [](gpu::AddressSpace space) {
+ switch (space) {
+ case gpu::AddressSpace::Global:
+ return 1;
+ case gpu::AddressSpace::Workgroup:
+ return 3;
+ case gpu::AddressSpace::Private:
+ return 5;
+ }
+ llvm_unreachable("unknown address space enum value");
+ return 0;
+ });
// Apply in-dialect lowering first. In-dialect lowering will replace ops
// which need to be lowered further, which is not supported by a single
// conversion pass.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
index a0e1d38..39fe02e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp
@@ -68,8 +68,9 @@
return signalPassFailure();
}
+ IRRewriter rewriter(&getContext());
if (llvmgpuUseMMASync) {
- if (failed(convertVectorToNVVMCompatibleMMASync(funcOp))) {
+ if (failed(convertVectorToNVVMCompatibleMMASync(rewriter, funcOp))) {
return signalPassFailure();
}
// Using TF32 for Float.
@@ -81,7 +82,9 @@
return signalPassFailure();
}
} else {
- convertVectorToMMAOps(funcOp);
+ if (failed(convertVectorToMMAOps(rewriter, funcOp))) {
+ return signalPassFailure();
+ }
}
createAsyncGroups(funcOp, llvmgpuUseMMASync);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 7e51dc2..b13a5b9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
@@ -410,7 +410,6 @@
pm.addNestedPass<func::FuncOp>(memref::createExpandOpsPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createLowerAffinePass());
- pm.addPass(createGPULowerMemorySpaceAttributesPass());
// Strip out the debug info for the kernel as CUDA driver doesn't diggest PTX
// debug info well.
pm.addPass(createStripDebugInfoPass());
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
index e276361..5aa0b5a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
@@ -655,10 +655,14 @@
mlir::vector::populateCastAwayVectorLeadingOneDimPatterns(patterns);
populatePrepareVectorToMMAPatterns(patterns, /*llvmgpuUseMMASync=*/false);
if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) {
+ target->emitOpError("vector to mma preparation patterns failed to apply");
+ return emitDefaultDefiniteFailure(target);
+ }
+ IRRewriter rewriter(getContext());
+ if (failed(convertVectorToMMAOps(rewriter, target))) {
target->emitOpError("vector to mma patterns failed to apply");
return emitDefaultDefiniteFailure(target);
}
- convertVectorToMMAOps(target);
results.push_back(target);
return DiagnosedSilenceableFailure::success();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
index c9dd5a5..93c7bad 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
@@ -201,12 +201,12 @@
func.func @shared_memory_lowering() {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0.000000e+00> : vector<4xf32>
- %0 = memref.alloc() : memref<1x16x32xf32, 3>
- %1 = memref.alloc() : memref<1x32x16xf32, 3>
- %2 = memref.alloc() : memref<1x8x16xf32, 3>
- vector.store %cst, %1[%c0, %c0, %c0] : memref<1x32x16xf32, 3>, vector<4xf32>
- vector.store %cst, %2[%c0, %c0, %c0] : memref<1x8x16xf32, 3>, vector<4xf32>
- vector.store %cst, %0[%c0, %c0, %c0] : memref<1x16x32xf32, 3>, vector<4xf32>
+ %0 = memref.alloc() : memref<1x16x32xf32, #gpu.address_space<workgroup>>
+ %1 = memref.alloc() : memref<1x32x16xf32, #gpu.address_space<workgroup>>
+ %2 = memref.alloc() : memref<1x8x16xf32, #gpu.address_space<workgroup>>
+ vector.store %cst, %1[%c0, %c0, %c0] : memref<1x32x16xf32, #gpu.address_space<workgroup>>, vector<4xf32>
+ vector.store %cst, %2[%c0, %c0, %c0] : memref<1x8x16xf32, #gpu.address_space<workgroup>>, vector<4xf32>
+ vector.store %cst, %0[%c0, %c0, %c0] : memref<1x16x32xf32, #gpu.address_space<workgroup>>, vector<4xf32>
return
}
}
@@ -246,10 +246,10 @@
%f0 = arith.constant 0.0 : f32
%c0 = arith.constant 0 : index
// CHECK: llvm.mlir.addressof @__dynamic_shared_memory__ : !llvm.ptr<array<0 x i8>, 3>
- %0 = memref.alloc() : memref<1xf32, 3>
- memref.store %f0, %0[%c0] : memref<1xf32, 3>
+ %0 = memref.alloc() : memref<1xf32, #gpu.address_space<workgroup>>
+ memref.store %f0, %0[%c0] : memref<1xf32, #gpu.address_space<workgroup>>
// CHECK-NOT: free
- memref.dealloc %0 : memref<1xf32, 3>
+ memref.dealloc %0 : memref<1xf32, #gpu.address_space<workgroup>>
return
}
}
@@ -271,10 +271,10 @@
%c0 = arith.constant 0 : index
%cst_f32 = arith.constant 0.000000e+00 : f32
%cst_i8 = arith.constant 0 : i8
- %0 = memref.alloc() : memref<1xi8, 3>
- %1 = memref.alloc() : memref<32xf32, 3>
- memref.store %cst_i8, %0[%c0] : memref<1xi8, 3>
- memref.store %cst_f32, %1[%c0] : memref<32xf32, 3>
+ %0 = memref.alloc() : memref<1xi8, #gpu.address_space<workgroup>>
+ %1 = memref.alloc() : memref<32xf32, #gpu.address_space<workgroup>>
+ memref.store %cst_i8, %0[%c0] : memref<1xi8, #gpu.address_space<workgroup>>
+ memref.store %cst_f32, %1[%c0] : memref<32xf32, #gpu.address_space<workgroup>>
return
}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
index d1b5b6c..a06a2c5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/legalize.mlir
@@ -37,13 +37,13 @@
// -----
-// CHECK: memref.global "private" @__shared_memory__ : memref<16x16xf32, 3>
+// CHECK: memref.global "private" @__shared_memory__ : memref<16x16xf32, #gpu.address_space<workgroup>>
// CHECK: func.func @allocation
-// CHECK: %[[A:.*]] = memref.get_global @__shared_memory__ : memref<16x16xf32, 3>
-// CHECK: memref.store %{{.*}}, %[[A]][%{{.*}}, %{{.*}}] : memref<16x16xf32, 3>
+// CHECK: %[[A:.*]] = memref.get_global @__shared_memory__ : memref<16x16xf32, #gpu.address_space<workgroup>>
+// CHECK: memref.store %{{.*}}, %[[A]][%{{.*}}, %{{.*}}] : memref<16x16xf32, #gpu.address_space<workgroup>>
func.func @allocation(%arg0: f32) {
- %0 = memref.alloc() : memref<16x16xf32, 3>
+ %0 = memref.alloc() : memref<16x16xf32, #gpu.address_space<workgroup>>
%c0 = arith.constant 0 : index
- memref.store %arg0, %0[%c0, %c0] : memref<16x16xf32, 3>
+ memref.store %arg0, %0[%c0, %c0] : memref<16x16xf32, #gpu.address_space<workgroup>>
return
}
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
index e790f90..2a59748 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
@@ -386,7 +386,8 @@
int64_t &tileSize) {
// Deduce the configuration for the K dimension. We need some power of two
// here so that we can do vector load.
- for (int64_t t = llvm::PowerOf2Floor(residualTilingFactor); t >= 2; t >>= 1) {
+ for (int64_t t = llvm::bit_floor<uint64_t>(residualTilingFactor); t >= 2;
+ t >>= 1) {
if (dimK % t == 0) {
tileSize = t;
return true;
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
index dce9cf4..0d4c86e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorToGPUSubgroupMMAOps.cpp
@@ -40,7 +40,11 @@
return signalPassFailure();
}
- convertVectorToMMAOps(funcOp);
+ IRRewriter rewriter(&getContext());
+ if (failed(convertVectorToMMAOps(rewriter, funcOp))) {
+ funcOp->emitError("failed conversion to GPU subgroup MMA ops");
+ return signalPassFailure();
+ }
// Make sure we actually generate GPU subgroup mma ops.
WalkResult result = funcOp.walk([](Operation* op) {
@@ -48,7 +52,7 @@
: WalkResult::advance();
});
if (!result.wasInterrupted()) {
- funcOp->emitError("failed conversion to GPU subgroup MMA ops");
+ funcOp->emitError("no GPU subgroup mma compute ops generated");
return signalPassFailure();
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
index ebf3702..6d7ce09 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
@@ -91,6 +91,7 @@
"@llvm-project//llvm:Passes",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:Target",
+ "@llvm-project//llvm:TargetParser",
"@llvm-project//mlir:Support",
],
)
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
index 9d4b5d6..93959e5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
@@ -73,6 +73,7 @@
LLVMPasses
LLVMSupport
LLVMTarget
+ LLVMTargetParser
MLIRSupport
PUBLIC
)
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
index 42a3049..01425cf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMCPUTarget.cpp
@@ -414,7 +414,7 @@
func.setDSOLocal(true);
func.setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
}
- for (auto &global : llvmModule->getGlobalList()) {
+ for (auto &global : llvmModule->globals()) {
global.setDSOLocal(true);
global.setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
index 1f249fa..1f86f76 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
@@ -17,8 +17,8 @@
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
index f2d8a5e..71c1113 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
@@ -12,8 +12,8 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TargetParser/X86TargetParser.h"
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
index 98e4b0f..216d95b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/internal/AndroidLinkerTool.cpp
@@ -8,7 +8,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/Triple.h"
#define DEBUG_TYPE "llvm-linker"
diff --git a/integrations/tensorflow/WORKSPACE b/integrations/tensorflow/WORKSPACE
index e94d707..7232e2a 100644
--- a/integrations/tensorflow/WORKSPACE
+++ b/integrations/tensorflow/WORKSPACE
@@ -7,7 +7,7 @@
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
-TENSORFLOW_COMMIT = "d1c09628895840a713e92e8557b5fd90afa94c18"
+TENSORFLOW_COMMIT = "75eaca49ed62c37278113b270a3e19edab0ba26d"
git_repository(
name = "org_tensorflow",
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index 5d75f46..4fd5b1c 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -981,16 +981,16 @@
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertSCFToCFPass());
pm.addPass(createConvertLinalgToLLVMPass());
- pm.addPass(createConvertVectorToLLVMPass(
- // clang-format off
- LowerVectorToLLVMOptions()
- .enableReassociateFPReductions(getReassociateFpReductions())
- .enableIndexOptimizations(getEnableIndexOptimizations())
- .enableArmNeon(getEnableArmNeon())
- .enableArmSVE(getEnableArmSve())
- .enableAMX(getEnableAmx())
- .enableX86Vector(getEnableX86vector())));
- // clang-format on
+ {
+ auto options = ConvertVectorToLLVMPassOptions();
+ options.reassociateFPReductions = getReassociateFpReductions();
+ options.force32BitVectorIndices = getEnableIndexOptimizations();
+ options.armNeon = getEnableArmNeon();
+ options.armSVE = getEnableArmSve();
+ options.amx = getEnableAmx();
+ options.x86Vector = getEnableX86vector();
+ pm.addPass(createConvertVectorToLLVMPass(options));
+ }
pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
if (getEnableAsync())
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 947c8b4..66616cb 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 947c8b4b5869f18b1807863138271b3149ffc245
+Subproject commit 66616cb9136ed360b0b8b3545fbd64050b32c8f6
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index 63ac2f8..a913e03 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit 63ac2f8f3989266b65fc3150bd25648f75e34a62
+Subproject commit a913e03964df57009a51b46ccba09b322f2ba31b