Removing descriptor set layouts from HAL IR and simplifying bindings.
* Renamed `push_constants` to `constants` (as there is no longer a
`push_constants` API)
* Dropped `#hal.descriptor_set.layout`
* Removed ordinal from `#hal.descriptor_set.binding` (as ordinals are
now implicit)
* Renamed `#hal.descriptor_set.binding` to `#hal.pipeline.binding`
* Removed `set` from `hal.interface.binding.subspan`
* Removed `#hal.interface.binding` and the spooky action at a distance
`hal.interface.binding` attr now that ordinals are implicit
Progress on #18154.
diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp
index 2268890..18896f2 100644
--- a/compiler/plugins/target/CUDA/CUDATarget.cpp
+++ b/compiler/plugins/target/CUDA/CUDATarget.cpp
@@ -672,10 +672,9 @@
}
auto layoutAttr = exportOp.getLayoutAttr();
- uint32_t constantCount =
- static_cast<uint32_t>(layoutAttr.getPushConstants());
+ uint32_t constantCount = static_cast<uint32_t>(layoutAttr.getConstants());
SmallVector<iree_hal_cuda_BindingBits_enum_t> bindingFlags;
- for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) {
+ for (auto bindingAttr : layoutAttr.getBindings()) {
iree_hal_cuda_BindingBits_enum_t flags = 0;
if (allEnumBitsSet(bindingAttr.getFlags(),
IREE::HAL::DescriptorFlags::ReadOnly)) {
diff --git a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp
index ce342b5..7db50ac 100644
--- a/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp
+++ b/compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp
@@ -399,9 +399,8 @@
// Specify the constant and binding information used to validate
// dispatches.
if (auto layoutAttr = exportOp.getLayout()) {
- dispatchAttrs.constantCount = layoutAttr.getPushConstants();
- dispatchAttrs.bindingCount =
- layoutAttr.getSetLayout(0).getBindings().size();
+ dispatchAttrs.constantCount = layoutAttr.getConstants();
+ dispatchAttrs.bindingCount = layoutAttr.getBindings().size();
}
LibraryBuilder::SourceLocation sourceLocation;
diff --git a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp
index 6bffd17..eb8918d 100644
--- a/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp
+++ b/compiler/plugins/target/MetalSPIRV/MetalSPIRVTarget.cpp
@@ -277,10 +277,9 @@
};
auto layoutAttr = exportOp.getLayoutAttr();
- uint32_t constantCount =
- static_cast<uint32_t>(layoutAttr.getPushConstants());
+ uint32_t constantCount = static_cast<uint32_t>(layoutAttr.getConstants());
SmallVector<iree_hal_metal_BindingBits_enum_t> bindingFlags;
- for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) {
+ for (auto bindingAttr : layoutAttr.getBindings()) {
iree_hal_metal_BindingBits_enum_t flags = 0;
if (allEnumBitsSet(bindingAttr.getFlags(),
IREE::HAL::DescriptorFlags::ReadOnly)) {
diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index 476d4ce..04e87bb 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp
@@ -655,10 +655,9 @@
}
auto layoutAttr = exportOp.getLayoutAttr();
- uint32_t constantCount =
- static_cast<uint32_t>(layoutAttr.getPushConstants());
+ uint32_t constantCount = static_cast<uint32_t>(layoutAttr.getConstants());
SmallVector<iree_hal_hip_BindingBits_enum_t> bindingFlags;
- for (auto bindingAttr : layoutAttr.getSetLayout(0).getBindings()) {
+ for (auto bindingAttr : layoutAttr.getBindings()) {
iree_hal_hip_BindingBits_enum_t flags = 0;
if (allEnumBitsSet(bindingAttr.getFlags(),
IREE::HAL::DescriptorFlags::ReadOnly)) {
diff --git a/compiler/plugins/target/VMVX/VMVXTarget.cpp b/compiler/plugins/target/VMVX/VMVXTarget.cpp
index 4ac7011..daba862 100644
--- a/compiler/plugins/target/VMVX/VMVXTarget.cpp
+++ b/compiler/plugins/target/VMVX/VMVXTarget.cpp
@@ -133,13 +133,13 @@
// Specify the constant and binding information used to validate
// dispatches.
if (auto layoutAttr = exportOp.getLayout()) {
- int64_t constantCount = layoutAttr.getPushConstants();
+ int64_t constantCount = layoutAttr.getConstants();
if (constantCount > 0) {
funcOp.setReflectionAttr("constant_count",
executableBuilder.getI8IntegerAttr(
static_cast<uint8_t>(constantCount)));
}
- size_t bindingCount = layoutAttr.getSetLayout(0).getBindings().size();
+ size_t bindingCount = layoutAttr.getBindings().size();
if (bindingCount > 0) {
funcOp.setReflectionAttr("binding_count",
executableBuilder.getI8IntegerAttr(
diff --git a/compiler/plugins/target/VMVX/test/smoketest.mlir b/compiler/plugins/target/VMVX/test/smoketest.mlir
index 44b3208..6bc3062 100644
--- a/compiler/plugins/target/VMVX/test/smoketest.mlir
+++ b/compiler/plugins/target/VMVX/test/smoketest.mlir
@@ -38,11 +38,10 @@
// CHECK-LABEL: hal.executable public @add_dispatch_0
// CHECK-NEXT: hal.executable.variant public @vmvx_bytecode_fb target(<"vmvx", "vmvx-bytecode-fb">) {
// CHECK-NEXT: hal.executable.export public @add_dispatch_0 ordinal(0)
-// CHECK-SAME: layout(#hal.pipeline.layout<push_constants = 0, sets = [
-// CHECK-SAME: <0, bindings = [
-// CHECK-SAME: <0, storage_buffer>,
-// CHECK-SAME: <1, storage_buffer>,
-// CHECK-SAME: <2, storage_buffer>
+// CHECK-SAME: layout(#hal.pipeline.layout<bindings = [
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer>,
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer>,
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer>
// CHECK: module attributes {vm.toplevel} {
// CHECK-NEXT: vm.module public @module {
// CHECK-NEXT: vm.func private @add_dispatch_0(
diff --git a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp
index 22ae954..58137fd 100644
--- a/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp
+++ b/compiler/plugins/target/VulkanSPIRV/VulkanSPIRVTarget.cpp
@@ -61,12 +61,14 @@
};
} // namespace
+using DescriptorSetLayout = std::pair<unsigned, ArrayRef<PipelineBindingAttr>>;
+
static std::tuple<iree_hal_vulkan_DescriptorSetLayoutDef_vec_ref_t,
iree_hal_vulkan_PipelineLayoutDef_vec_ref_t,
DenseMap<IREE::HAL::PipelineLayoutAttr, uint32_t>>
createPipelineLayoutDefs(ArrayRef<IREE::HAL::ExecutableExportOp> exportOps,
FlatbufferBuilder &fbb) {
- DenseMap<IREE::HAL::DescriptorSetLayoutAttr, size_t> descriptorSetLayoutMap;
+ DenseMap<DescriptorSetLayout, size_t> descriptorSetLayoutMap;
DenseMap<IREE::HAL::PipelineLayoutAttr, uint32_t> pipelineLayoutMap;
SmallVector<iree_hal_vulkan_DescriptorSetLayoutDef_ref_t>
descriptorSetLayoutRefs;
@@ -77,18 +79,20 @@
continue; // already present
}
+ // Currently only one descriptor set on the compiler side. We could
+ // partition it by binding type (direct vs indirect, etc).
SmallVector<uint32_t> descriptorSetLayoutOrdinals;
- for (auto descriptorSetLayoutAttr : pipelineLayoutAttr.getSetLayouts()) {
- auto it = descriptorSetLayoutMap.find(descriptorSetLayoutAttr);
- if (it != descriptorSetLayoutMap.end()) {
- descriptorSetLayoutOrdinals.push_back(it->second);
- continue;
- }
-
+ auto descriptorSetLayout =
+ DescriptorSetLayout(0, pipelineLayoutAttr.getBindings());
+ auto it = descriptorSetLayoutMap.find(descriptorSetLayout);
+ if (it != descriptorSetLayoutMap.end()) {
+ descriptorSetLayoutOrdinals.push_back(it->second);
+ } else {
SmallVector<iree_hal_vulkan_DescriptorSetLayoutBindingDef_ref_t>
bindingRefs;
- for (auto bindingAttr : descriptorSetLayoutAttr.getBindings()) {
- uint32_t ordinal = static_cast<uint32_t>(bindingAttr.getOrdinal());
+ for (auto [i, bindingAttr] :
+ llvm::enumerate(pipelineLayoutAttr.getBindings())) {
+ uint32_t ordinal = static_cast<uint32_t>(i);
iree_hal_vulkan_VkDescriptorType_enum_t descriptorType = 0;
switch (bindingAttr.getType()) {
case IREE::HAL::DescriptorType::UniformBuffer:
@@ -107,7 +111,7 @@
auto bindingsRef = fbb.createOffsetVecDestructive(bindingRefs);
descriptorSetLayoutOrdinals.push_back(descriptorSetLayoutRefs.size());
- descriptorSetLayoutMap[descriptorSetLayoutAttr] =
+ descriptorSetLayoutMap[descriptorSetLayout] =
descriptorSetLayoutRefs.size();
descriptorSetLayoutRefs.push_back(
iree_hal_vulkan_DescriptorSetLayoutDef_create(fbb, bindingsRef));
@@ -116,7 +120,7 @@
fbb.createInt32Vec(descriptorSetLayoutOrdinals);
iree_hal_vulkan_PushConstantRange_vec_ref_t pushConstantRangesRef = 0;
- if (int64_t pushConstantCount = pipelineLayoutAttr.getPushConstants()) {
+ if (int64_t pushConstantCount = pipelineLayoutAttr.getConstants()) {
SmallVector<iree_hal_vulkan_PushConstantRange> pushConstantRanges;
iree_hal_vulkan_PushConstantRange range0;
range0.stage_flags = 0x00000020u; // VK_SHADER_STAGE_COMPUTE_BIT
diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
index 54094f8..089f6a2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
@@ -137,8 +137,7 @@
if (!v1InterfaceBinding || !v2InterfaceBinding) {
return true;
}
- if (v1InterfaceBinding.getSet() != v2InterfaceBinding.getSet() ||
- v1InterfaceBinding.getBinding() != v2InterfaceBinding.getBinding() ||
+ if (v1InterfaceBinding.getBinding() != v2InterfaceBinding.getBinding() ||
v1InterfaceBinding.getByteOffset() !=
v2InterfaceBinding.getByteOffset()) {
// If the set, binding or offsets are different, map these to different
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir
index 6dd1983..4491ab1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [bf16, bf16, bf16], original_type = tensor<1x1000xbf16>, matmul_narrow_M = 1 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array<i64: 16, 16, 16>>
func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes {
hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}>
}{
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1000xbf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1000xbf16, #encoding>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1000xbf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1000xbf16, #encoding>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1000], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1000xbf16>> -> tensor<1x1000xbf16>
%3 = iree_encoding.set_encoding %2 : tensor<1x1000xbf16> -> tensor<1x1000xbf16, #encoding>
flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [1, 1000], strides = [1, 1] : tensor<1x1000xbf16, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<1x1000xbf16, #encoding>>
@@ -37,11 +35,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -51,8 +47,8 @@
hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}>
} {
%c0 = arith.constant 0 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x7xf32>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<7x7xf32, #encoding>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x7xf32>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<7x7xf32, #encoding>>
%14 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<7x7xf32>> -> tensor<7x7xf32>
%17 = iree_encoding.set_encoding %14 : tensor<7x7xf32> -> tensor<7x7xf32, #encoding>
flow.dispatch.tensor.store %17, %11, offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : tensor<7x7xf32, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<7x7xf32, #encoding>>
@@ -69,11 +65,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -83,8 +77,8 @@
hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}>
} {
%c0 = arith.constant 0 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x32xf32, #encoding>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x32xf32, #encoding>>
%14 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>> -> tensor<128x80x32xf32>
%17 = iree_encoding.set_encoding %14 : tensor<128x80x32xf32> -> tensor<128x80x32xf32, #encoding>
flow.dispatch.tensor.store %17, %11, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1]
@@ -93,8 +87,8 @@
return
}
// CHECK-LABEL: func @set_encoding_128x80x32_batch_matmul_LHS(
-// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
-// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x10x32x8x1xf32>>
+// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
+// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x10x32x8x1xf32>>
// CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>> -> tensor<128x80x32xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x10x32x8x1xf32>
// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32>
@@ -102,11 +96,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -118,8 +110,8 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%5 = arith.index_castui %0 {stream.alignment = 64 : index} : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128x32x320xf32, #encoding>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128x32x320xf32, #encoding>>
%16 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>> -> tensor<128x32x320xf32>
%19 = iree_encoding.set_encoding %16 : tensor<128x32x320xf32> -> tensor<128x32x320xf32, #encoding>
flow.dispatch.tensor.store %19, %13, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1]
@@ -128,8 +120,8 @@
return
}
// CHECK-LABEL: func @set_encoding_128x32x320_batch_matmul_RHS(
-// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
-// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x40x32x8x1xf32>>
+// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
+// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x40x32x8x1xf32>>
// CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>> -> tensor<128x32x320xf32>
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x40x32x8x1xf32>
// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32>
@@ -137,11 +129,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -153,8 +143,8 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%3 = arith.index_castui %0 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x320xf32>>
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x320xf32, #encoding>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x320xf32>>
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x320xf32, #encoding>>
%10 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0], sizes = [128, 80, 320], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<128x80x320xf32, #encoding>>
-> tensor<128x80x320xf32, #encoding>
@@ -166,9 +156,9 @@
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0)
// CHECK: %[[CAST:.+]] = arith.index_castui %[[D0]] : i32 to index
-// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]])
+// CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]])
// CHECK-SAME: : !flow.dispatch.tensor<writeonly:tensor<128x80x320xf32>>
-// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[CAST]])
+// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[CAST]])
// CHECK-SAME: : !flow.dispatch.tensor<readonly:tensor<128x10x40x8x8xf32>>
// CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 40, 8, 8], strides = [1, 1, 1, 1, 1]
@@ -255,12 +245,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -275,11 +263,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -307,12 +295,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1]
@@ -352,12 +340,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -369,11 +355,11 @@
hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}>
} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<16x16xf32, #encoding_lhs>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<16x1xf32, #encoding_rhs>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<16x1xf32, #encoding_result>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<16x16xf32, #encoding_lhs>>
@@ -396,11 +382,11 @@
}
// CHECK-LABEL: func @matvec_lowering_f32f32f32_aarch64()
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<2x16x8x1xf32>>
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<1x16x1x1xf32>>
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<1x2x1x8xf32>>
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [2, 16, 8, 1], strides = [1, 1, 1, 1]
@@ -416,12 +402,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -436,11 +420,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf16, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
@@ -468,12 +452,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf16>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf16>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf16>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1]
@@ -489,12 +473,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -509,11 +491,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -542,12 +524,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP1]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x4x1xf32>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x4xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1]
@@ -563,12 +545,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -583,11 +563,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -615,12 +595,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 8, 1], strides = [1, 1, 1, 1]
@@ -636,12 +616,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -656,11 +634,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -688,12 +666,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1]
@@ -709,12 +687,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -729,11 +705,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
@@ -761,12 +737,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1]
@@ -782,12 +758,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -802,11 +776,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf16, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_lhs>>{%M, %K}
@@ -834,12 +808,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf16>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1]
@@ -855,12 +829,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -875,11 +847,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
@@ -907,12 +879,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xbf16>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xbf16>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1]
@@ -928,12 +900,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -948,11 +918,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xbf16, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
@@ -980,12 +950,12 @@
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xbf16>>{%[[TILED_M]], %[[K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x1xbf16>>{%[[TILED_N]], %[[K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xbf16>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[K]], 16, 1], strides = [1, 1, 1, 1]
@@ -1001,12 +971,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1021,11 +989,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
@@ -1055,12 +1023,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xbf16>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xbf16>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1]
@@ -1076,12 +1044,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1096,11 +1062,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xbf16, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xbf16, #encoding_lhs>>{%M, %K}
@@ -1130,12 +1096,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xbf16>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xbf16>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xbf16>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1]
@@ -1151,12 +1117,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1171,11 +1135,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf16, #encoding_result>>{%M, %N}
%lhs_f32 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -1213,9 +1177,9 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
// CHECK-DAG: %[[M_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[M]]]
// CHECK-DAG: %[[N_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[N]]]
-// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[M_CEILDIV_8]], %[[K]]}
-// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf16>>{%[[N_CEILDIV_8]], %[[K]]}
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf16>>{%[[M_CEILDIV_8]], %[[N_CEILDIV_8]]}
+// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf32>>{%[[M_CEILDIV_8]], %[[K]]}
+// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x8x1xf16>>{%[[N_CEILDIV_8]], %[[K]]}
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf16>>{%[[M_CEILDIV_8]], %[[N_CEILDIV_8]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_8]], %[[K]], 8, 1], {{.*}} -> tensor<?x?x8x1xf32>
// CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_8]], %[[K]], 8, 1], {{.*}} -> tensor<?x?x8x1xf16>
// CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_8]], %[[N_CEILDIV_8]], 8, 8], {{.*}} -> tensor<?x?x8x8xf16>
@@ -1226,12 +1190,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1246,11 +1208,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf16, #encoding_result>>{%M, %N}
%lhs_f32 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -1289,9 +1251,9 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
// CHECK-DAG: %[[M_CEILDIV_16:.+]] = affine.apply #[[$MAP_CEILDIV_16]]()[%[[M]]]
// CHECK-DAG: %[[N_CEILDIV_16:.+]] = affine.apply #[[$MAP_CEILDIV_16]]()[%[[N]]]
-// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%[[M_CEILDIV_16]], %[[K]]}
-// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[N_CEILDIV_16]], %[[K]]}
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf16>>{%[[M_CEILDIV_16]], %[[N_CEILDIV_16]]}
+// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%[[M_CEILDIV_16]], %[[K]]}
+// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf16>>{%[[N_CEILDIV_16]], %[[K]]}
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf16>>{%[[M_CEILDIV_16]], %[[N_CEILDIV_16]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_16]], %[[K]], 16, 1], {{.*}} -> tensor<?x?x16x1xf32>
// CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_16]], %[[K]], 16, 1], {{.*}} -> tensor<?x?x16x1xf16>
// CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M_CEILDIV_16]], %[[N_CEILDIV_16]], 16, 16], {{.*}} -> tensor<?x?x16x16xf16>
@@ -1302,12 +1264,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1322,11 +1282,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1352,11 +1312,11 @@
// CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%[[M]], %[[K]]}
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%[[K]], %[[N]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%[[M]], %[[N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0], sizes = [%[[M]], %[[K]]], strides = [1, 1]
@@ -1372,12 +1332,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1392,11 +1350,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1426,12 +1384,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1]
@@ -1447,12 +1405,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1467,11 +1423,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1500,12 +1456,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP0]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x8xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x8xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 8], strides = [1, 1, 1, 1]
@@ -1521,12 +1477,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1541,11 +1495,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi4, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1576,12 +1530,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x4x2xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP2]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xi4>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x4x16xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 4, 2], strides = [1, 1, 1, 1]
@@ -1597,12 +1551,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1617,11 +1569,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi4, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1650,12 +1602,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP0]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x8xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x8xi4>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 8], strides = [1, 1, 1, 1]
@@ -1671,12 +1623,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1691,11 +1641,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi4, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1726,12 +1676,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x4x16xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP2]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x16xi4>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x4x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 4, 16], strides = [1, 1, 1, 1]
@@ -1802,12 +1752,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1822,11 +1770,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1856,12 +1804,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1]
@@ -1877,12 +1825,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1897,11 +1843,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -1931,12 +1877,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x2xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x2xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 2], strides = [1, 1, 1, 1]
@@ -1952,12 +1898,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1972,11 +1916,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -2006,12 +1950,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1]
@@ -2027,12 +1971,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -2047,11 +1989,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -2081,12 +2023,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xi8>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x16x2xi8>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 16, 2], strides = [1, 1, 1, 1]
@@ -2160,12 +2102,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -2180,11 +2120,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi16, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi16, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi16, #encoding_lhs>>{%M, %K}
@@ -2214,12 +2154,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x2xi16>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x2xi16>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xi32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 2], strides = [1, 1, 1, 1]
@@ -2235,12 +2175,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -2255,11 +2193,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %lhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi16, #encoding_lhs>>{%M, %K}
- %rhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %rhs_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi4, #encoding_rhs>>{%K, %N}
- %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%lhs = flow.dispatch.tensor.load %lhs_binding, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi16, #encoding_lhs>>{%M, %K}
@@ -2297,9 +2235,9 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
// CHECK-DAG: %[[K_CEILDIV_8:.+]] = affine.apply #[[$MAP_CEILDIV_8]]()[%[[K]]]
// CHECK-DAG: %[[N_CEILDIV_32:.+]] = affine.apply #[[$MAP_CEILDIV_32]]()[%[[N]]]
-// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x1x8xi16>>{%[[M]], %[[K_CEILDIV_8]]}
-// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x32x8xi4>>{%[[N_CEILDIV_32]], %[[K_CEILDIV_8]]}
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x1x32xi32>>{%[[M]], %[[N_CEILDIV_32]]}
+// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x1x8xi16>>{%[[M]], %[[K_CEILDIV_8]]}
+// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<?x?x32x8xi4>>{%[[N_CEILDIV_32]], %[[K_CEILDIV_8]]}
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.*}} : !flow.dispatch.tensor<readwrite:tensor<?x?x1x32xi32>>{%[[M]], %[[N_CEILDIV_32]]}
// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M]], %[[K_CEILDIV_8]], 1, 8], {{.*}} -> tensor<?x?x1x8xi16>
// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[N_CEILDIV_32]], %[[K_CEILDIV_8]], 32, 8], {{.*}} -> tensor<?x?x32x8xi4>
// CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [%[[M]], %[[N_CEILDIV_32]], 1, 32], {{.*}} -> tensor<?x?x1x32xi32>
@@ -2792,13 +2730,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
#encoding_bcast = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array<i64: 16, 16, 16>>
@@ -2807,10 +2743,10 @@
} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x128x64xi8, #encoding>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x128x64xi8, #encoding>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
%7 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x128x64xi8, #encoding>> -> tensor<2x128x64xi8, #encoding>
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>> -> tensor<2x64xf32, #encoding_bcast>
%9 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>> -> tensor<2x64xf32, #encoding_bcast>
@@ -2851,11 +2787,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
#encoding_bcast = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
@@ -2864,8 +2798,8 @@
} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x64xf32, #encoding_bcast>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x64xf32, #encoding_bcast>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x64xf32, #encoding_bcast>> -> tensor<128x64xf32, #encoding_bcast>
%13 = tensor.empty() : tensor<2x128x64xf32, #encoding>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<128x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) {
@@ -2892,11 +2826,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
#encoding_bcast = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1)>, round_dims_to = array<i64: 16, 16, 16>>
@@ -2905,8 +2837,8 @@
} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x128xf32, #encoding_bcast>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x128xf32, #encoding_bcast>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x128xf32, #encoding_bcast>> -> tensor<2x128xf32, #encoding_bcast>
%13 = tensor.empty() : tensor<2x128x64xf32, #encoding>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x128xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) {
@@ -2933,11 +2865,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
#encoding_bcast = #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array<i64: 16, 16, 16>>
@@ -2946,8 +2876,8 @@
} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>> -> tensor<2x64xf32, #encoding_bcast>
%13 = tensor.empty() : tensor<2x128x64xf32, #encoding>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) {
@@ -2974,11 +2904,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array<i64: 16, 16, 16>>
#encoding_bcast = #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<2x128x64xf32>, user_indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array<i64: 16, 16, 16>>
@@ -2987,8 +2915,8 @@
} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x128x64xf32, #encoding>>
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64xf32, #encoding_bcast>> -> tensor<2x64xf32, #encoding_bcast>
%13 = tensor.empty() : tensor<2x128x64xf32, #encoding>
%14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%8 : tensor<2x64xf32, #encoding_bcast>) outs(%13 : tensor<2x128x64xf32, #encoding>) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir
index 0464a42..10ceeaa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -20,11 +18,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xi8, #encoding_lhs>>{%M, %K}
@@ -55,17 +53,17 @@
// CHECK: %[[LHS_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor<?x?xi8, #iree_encoding.encoding<operand_index = 0 : i64, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 16, 16, 16>>> -> index, index
// CHECK-DAG: %[[LHS_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[M]], %[[LHS_TILE_SIZES]]#0]
// CHECK-DAG: %[[LHS_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[K]], %[[LHS_TILE_SIZES]]#1]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%[[LHS_OUTER_SIZE0]], %[[LHS_OUTER_SIZE1]], %[[LHS_TILE_SIZES]]#0, %[[LHS_TILE_SIZES]]#1}
// CHECK: %[[RHS_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor<?x?xi8, #iree_encoding.encoding<operand_index = 1 : i64, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 16, 16, 16>>> -> index, index
// CHECK-DAG: %[[RHS_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[N]], %[[RHS_TILE_SIZES]]#0]
// CHECK-DAG: %[[RHS_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[K]], %[[RHS_TILE_SIZES]]#1]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%[[RHS_OUTER_SIZE0]], %[[RHS_OUTER_SIZE1]], %[[RHS_TILE_SIZES]]#0, %[[RHS_TILE_SIZES]]#1}
// CHECK: %[[RESULT_TILE_SIZES:.+]]:2 = iree_codegen.query_tile_sizes tensor<?x?xi32, #iree_encoding.encoding<operand_index = 2 : i64, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 16, 16, 16>>> -> index, index
// CHECK-DAG: %[[RESULT_OUTER_SIZE0:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[M]], %[[RESULT_TILE_SIZES]]#0]
// CHECK-DAG: %[[RESULT_OUTER_SIZE1:.+]] = affine.apply #[[MAP_CEILDIV]]()[%[[N]], %[[RESULT_TILE_SIZES]]#1]
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xi32>>{%[[RESULT_OUTER_SIZE0]], %[[RESULT_OUTER_SIZE1]], %[[RESULT_TILE_SIZES]]#0, %[[RESULT_TILE_SIZES]]#1}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[LHS_OUTER_SIZE0]], %[[LHS_OUTER_SIZE1]], %[[LHS_TILE_SIZES]]#0, %[[LHS_TILE_SIZES]]#1], strides = [1, 1, 1, 1]
@@ -81,12 +79,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> ((3 ceildiv s0) * s0)>
#map1 = affine_map<()[s0] -> ((1 ceildiv s0) * s0)>
@@ -102,9 +98,9 @@
%c32_i64 = arith.constant 32 : i64
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x2xf32, #encoding_lhs>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x3xf32, #encoding_rhs>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x3xf32, #encoding_result>>{%arg4, %arg5}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x2xf32, #encoding_lhs>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x3xf32, #encoding_rhs>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x3xf32, #encoding_result>>{%arg4, %arg5}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x2xf32, #encoding_lhs>> -> tensor<1x2xf32, #encoding_lhs>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32, #encoding_rhs>> -> tensor<2x3xf32, #encoding_rhs>
%7 = tensor.empty() : tensor<1x3xf32, #encoding_result>
@@ -115,11 +111,11 @@
}
// CHECK: func.func @fill_matmul
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<1x1x8x4xf32>>
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<1x1x8x4xf32>>
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<1x1x8x8xf32>>
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [1, 1, 8, 4], strides = [1, 1, 1, 1]
@@ -137,11 +133,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -153,9 +147,9 @@
%c0 = arith.constant 0 : index
%d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32, #encoding_lhs>>{%d0, %d1}
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1} -> tensor<?x?xf32>
@@ -172,10 +166,10 @@
// CHECK-DAG: %[[CST:.+]] = arith.constant 0.0
// CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
-// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-DAG: %[[TILED_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]]
// CHECK-DAG: %[[TILED_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]]
-// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<writeonly:tensor<?x?x8x4xf32>>{%[[TILED_D0]], %[[TILED_D1]]}
// CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
// CHECK: %[[EMPTY:.+]] = tensor.empty
@@ -187,11 +181,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -204,9 +196,9 @@
%cst = arith.constant 0.000000e+00 : f32
%d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%d0, %d1}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%d0, %d1}
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%d0, %d1}
@@ -226,9 +218,9 @@
// CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[TILED_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]]
// CHECK-DAG: %[[TILED_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]]
-// CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xf32>>{%[[TILED_D0]], %[[TILED_D1]]}
-// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_D0]], %[[TILED_D1]], 8, 4], strides = [1, 1, 1, 1]
// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[D0]], %[[D1]])
@@ -238,12 +230,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -258,11 +248,11 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [%M, %K], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
@@ -292,12 +282,12 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[MAP0]]()[%[[M]]]
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[MAP1]]()[%[[K]]]
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xf32>>{%[[TILED_M]], %[[TILED_K]]}
// CHECK: %[[TILED_N:.+]] = affine.apply #[[MAP0]]()[%[[N]]]
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<?x?x8x4xf32>>{%[[TILED_N]], %[[TILED_K]]}
-// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: !flow.dispatch.tensor<readwrite:tensor<?x?x8x8xf32>>{%[[TILED_M]], %[[TILED_N]]}
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]
// CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4], strides = [1, 1, 1, 1]
diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp
index 5f25384..bc9420f 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertBf16ToUInt16Buffers.cpp
@@ -92,10 +92,9 @@
auto newOp =
rewriter.replaceOpWithNewOp<IREE::HAL::InterfaceBindingSubspanOp>(
- op, newResultTy, adaptor.getLayout(), adaptor.getSet(),
- adaptor.getBinding(), adaptor.getByteOffset(),
- adaptor.getDynamicDims(), adaptor.getAlignmentAttr(),
- adaptor.getDescriptorFlagsAttr());
+ op, newResultTy, adaptor.getLayout(), adaptor.getBinding(),
+ adaptor.getByteOffset(), adaptor.getDynamicDims(),
+ adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr());
LLVM_DEBUG(llvm::dbgs() << "Bf16Emulation: new op: " << newOp << "\n");
(void)newOp;
return success();
diff --git a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
index e2c8805..772faf4 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/EmulateNarrowType.cpp
@@ -81,9 +81,9 @@
}
rewriter.replaceOpWithNewOp<IREE::HAL::InterfaceBindingSubspanOp>(
- op, newResultType, adaptor.getLayout(), adaptor.getSet(),
- adaptor.getBinding(), byteOffset, dynamicLinearizedSize,
- adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr());
+ op, newResultType, adaptor.getLayout(), adaptor.getBinding(),
+ byteOffset, dynamicLinearizedSize, adaptor.getAlignmentAttr(),
+ adaptor.getDescriptorFlagsAttr());
return success();
}
};
diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
index 2f7a39e..aa9e124 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
@@ -285,7 +285,7 @@
auto newOffset = rewriter.create<arith::ConstantIndexOp>(loc, 0);
auto newOp = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
- subspanOp.getLoc(), newType, subspanOp.getLayout(), subspanOp.getSet(),
+ subspanOp.getLoc(), newType, subspanOp.getLayout(),
subspanOp.getBinding(), newOffset, dynamicShape,
subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr());
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir
index 7c4cd2f..5bdfb14 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir
@@ -268,13 +268,6 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
#config = #iree_gpu.derived_thread_config
module {
func.func @inferred_im2col(%2: tensor<2x34x34x128xf16>, %3: tensor<2x128x8xf16>) -> tensor<2x128x8xf16>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir
index 126c6f5..40ac0b7 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_create_fast_slow_path.mlir
@@ -1,22 +1,20 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-create-fast-slow-path))" --mlir-print-local-scope %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @padded_conv() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c32 = arith.constant 32 : index
%c112 = arith.constant 112 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x224x224x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x224x224x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
index 0831e5b..7a3ab28 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-distribute, cse))" %s --split-input-file | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 256)>
#map1 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>
@@ -15,11 +13,11 @@
%cst = arith.constant 0.000000e+00 : f32
%c64 = arith.constant 64 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %0, 64 : memref<233x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %1, 64 : memref<233x1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %2, 64 : memref<233x1024xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -51,12 +49,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 256)>
#map1 = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>
@@ -66,11 +62,11 @@
%cst = arith.constant 0.000000e+00 : f32
%c64 = arith.constant 64 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %0, 64 : memref<233x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %1, 64 : memref<233x1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<233x1024xf32>
memref.assume_alignment %2, 64 : memref<233x1024xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir
index 3535a86..b483dda 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pipeline.mlir
@@ -2,12 +2,10 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-pipelining{epilogue-peeling=false}))" --split-input-file %s | FileCheck %s
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-pipelining{pipeline-depth=3 schedule-index=2 epilogue-peeling=false}))" --split-input-file %s | FileCheck -check-prefix=CHECK-NV %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @_matmul_f16_f16_dispatch_0_fill_3456x1024() {
%c2048 = arith.constant 2048 : index
@@ -21,11 +19,11 @@
%3 = gpu.thread_id z
%4 = memref.alloc() : memref<4x32x40xf16, 3>
%5 = memref.alloc() : memref<4x32x40xf16, 3>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16>
memref.assume_alignment %6, 64 : memref<3456x2048xf16>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16>
memref.assume_alignment %7, 64 : memref<2048x1024xf16>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16>
memref.assume_alignment %8, 64 : memref<3456x1024xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -63,12 +61,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @nvidia_tenscore_schedule_f16() {
%c3 = arith.constant 3 : index
@@ -86,11 +82,11 @@
%alloc = memref.alloc() : memref<128x256xf16, #gpu.address_space<workgroup>>
%alloc_1 = memref.alloc() : memref<3x128x32xf16, #gpu.address_space<workgroup>>
%alloc_2 = memref.alloc() : memref<3x32x256xf16, #gpu.address_space<workgroup>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<512x1280xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<512x1280xf16>
memref.assume_alignment %3, 64 : memref<512x1280xf16>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<1280x1280xf16>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<1280x1280xf16>
memref.assume_alignment %4, 64 : memref<1280x1280xf16>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<512x1280xf16>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<512x1280xf16>
memref.assume_alignment %5, 64 : memref<512x1280xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -517,12 +513,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @nvidia_tenscore_schedule_f32() {
%c31 = arith.constant 31 : index
@@ -540,11 +534,11 @@
%alloc = memref.alloc() : memref<128x128xf32, #gpu.address_space<workgroup>>
%alloc_2 = memref.alloc() : memref<3x128x32xf32, #gpu.address_space<workgroup>>
%alloc_3 = memref.alloc() : memref<3x32x128xf32, #gpu.address_space<workgroup>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32>
memref.assume_alignment %3, 64 : memref<256x256xf32>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<256x256xf32>
memref.assume_alignment %4, 64 : memref<256x256xf32>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x256xf32>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x256xf32>
memref.assume_alignment %5, 64 : memref<256x256xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir
index 9377136..7a0334c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups.mlir
@@ -4,20 +4,18 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-reorder-workgroups{strategy=transpose}))" \
// RUN: --split-input-file %s | FileCheck --check-prefix=TRANSPOSE %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c96 = arith.constant 96 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96xf32>>
%3 = tensor.empty() : tensor<128x96xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir
index 04ffb5b..a07b26f 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_reorder_workgroups_static.mlir
@@ -33,11 +33,9 @@
// TRANSPOSE-DAG: affine.apply #{{.+}}()[%[[REM]]]
// TRANSPOSE: return
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @main_dispatch_0 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -54,8 +52,8 @@
%c64 = arith.constant 64 : index
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32000x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32000x32000xf16>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%2 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_y]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir
index 527c083..7598316 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_alloc.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt %s --allow-unregistered-dialect --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile-to-serial-loops,iree-codegen-gpu-tensor-alloc))" | FileCheck %s
// RUN: iree-opt %s --allow-unregistered-dialect --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile-to-serial-loops{coalesce-loops},iree-codegen-gpu-tensor-alloc))" | FileCheck %s --check-prefix=COALESCE_LOOPS
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_2048x512x1024() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y]
@@ -38,19 +36,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1x384x384() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x384xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x384xf32>> -> tensor<1x384xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%4 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%workgroup_id_x]
@@ -68,19 +64,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_multi_uses() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y]
@@ -107,12 +101,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_33x33x903168_f32() {
%c0 = arith.constant 0 : index
@@ -130,10 +122,10 @@
%5 = arith.index_castui %2 {stream.alignment = 4096 : index, stream.values = [1240289280 : index, 1789415424 : index]} : i32 to index
%6 = arith.index_castui %3 {stream.alignment = 8192 : index, stream.values = [633077760 : index, 752295936 : index]} : i32 to index
%7 = arith.index_castui %4 {stream.alignment = 64 : index, stream.values = [1486349952 : index, 1486358464 : index]} : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<33x903168xf32>>
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<903168x33xf32>>
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<33x33xf32>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<33x33xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<33x903168xf32>>
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<903168x33xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<33x33xf32>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<33x33xf32>>
%12 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_x]
%13 = flow.dispatch.tensor.load %11, offsets = [%12, 0], sizes = [32, 33], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<33x33xf32>> -> tensor<32x33xf32>
%14 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [903168, 33], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<903168x33xf32>> -> tensor<903168x33xf32>
@@ -160,23 +152,21 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @weight_dequant_matmul() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128x2048xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xi4>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128x2048xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xi4>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%5 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y]
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -235,19 +225,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv() attributes {translation_info = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [256, 1, 1] subgroup_size = 64, {mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, subgroup_m_count = 1, subgroup_n_count = 4>}>} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x1280x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32x32x1280xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x1280x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32x32x1280xf32>>
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
index 364c25b..233c5b6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tensor-tile, cse))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 256]]>
#map = affine_map<()[s0] -> (s0 * 256)>
@@ -14,9 +12,9 @@
module {
func.func @add_tensor() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<233x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<233x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<233x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<233x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<233x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<233x1024xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply #map()[%workgroup_id_x]
@@ -35,9 +33,9 @@
// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0 * 4)>
// CHECK-LABEL: func.func @add_tensor
-// CHECK-DAG: %[[A:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[C:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[A:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[C:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[LA:.*]] = flow.dispatch.tensor.load %[[A]]
// CHECK-DAG: %[[LB:.*]] = flow.dispatch.tensor.load %[[B]]
// CHECK-DAG: %[[LC:.*]] = flow.dispatch.tensor.load %[[C]]
@@ -58,11 +56,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 4]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -73,8 +69,8 @@
func.func @reduction() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%2 = affine.apply #map()[%workgroup_id_x]
%3 = flow.dispatch.tensor.load %1, offsets = [%2], sizes = [64], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<128xf32>> -> tensor<64xf32>
@@ -116,11 +112,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 4, 4]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -131,8 +125,8 @@
func.func @reduction_broadcast() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x32x10x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32x10x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x32x10x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32x10x4096xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%2 = affine.apply #map()[%workgroup_id_x]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir
index 9cc4a19..6b15d52 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt -split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tile))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @innermost_reduction() {
%c1 = arith.constant 1 : index
@@ -16,9 +14,9 @@
%3 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 394752 : index, 984064 : index]} : i32 to index
%4 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [0 : index, 196608 : index, 197120 : index]} : i32 to index
%5 = arith.index_cast %2 {stream.alignment = 512 : index, stream.values = [512 : index, 197120 : index, 197632 : index]} : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%9 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%workgroup_id_x]
@@ -61,11 +59,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @has_scf_if() {
%c49152 = arith.constant 49152 : index
@@ -74,8 +70,8 @@
%c1023_i32 = arith.constant 1023 : i32
%c2_i32 = arith.constant 2 : i32
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<49152xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<49152xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<49152xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<49152xi32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%2 = affine.apply affine_map<()[s0] -> (s0 * 256)>()[%workgroup_id_x]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
index 2cea3e2..ee1cadd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tile_reduction.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-tile-reduction),canonicalize,cse)" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @warp_reduction_dispatch() {
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%2 = flow.dispatch.tensor.load %1, offsets = [%workgroup_id_x], sizes = [1], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<512xf32>> -> tensor<1xf32>
%3 = flow.dispatch.tensor.load %0, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>> -> tensor<1x10240xf32>
@@ -49,18 +47,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @warp_reduction_batch_matmul() {
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<11x512x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<11x512x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<11x512x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<11x512x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<11x512x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<11x512x512xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_id_z = hal.interface.workgroup.id[2] : index
@@ -95,16 +91,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @warp_reduction_broadcast_dispatch() {
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x10240xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%2 = flow.dispatch.tensor.load %1, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<512x10240xf32>> -> tensor<1x10240xf32>
%3 = flow.dispatch.tensor.load %0, offsets = [%workgroup_id_x, 0], sizes = [1, 10240], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>> -> tensor<1x10240xf32>
@@ -163,22 +157,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @warp_reduction_multi_reduction() {
%cst = arith.constant 0.000000e+00 : f32
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%15 = flow.dispatch.tensor.load %14, offsets = [%workgroup_id_x], sizes = [1], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<4096xf32>> -> tensor<1xf32>
%16 = flow.dispatch.tensor.load %13, offsets = [0, 0], sizes = [86, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<86x128xf32>> -> tensor<86x128xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
index bb3565f..a60528d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/transform_gpu_workgroup_swizzle.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt %s --iree-transform-dialect-interpreter -transform-dialect-drop-schedule --split-input-file | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c96 = arith.constant 96 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96xf32>>
%3 = tensor.empty() : tensor<128x96xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
index a50b6ec..e365a2b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/vector_reduction_to_gpu.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline='builtin.module(func.func(iree-codegen-vector-reduction-to-gpu, cse))' %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline='builtin.module(func.func(iree-codegen-vector-reduction-to-gpu, cse))' %s | FileCheck %s --check-prefix=CDNA3
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [32, 1, 1] subgroup_size = 32>
@@ -17,8 +15,8 @@
%cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32>
%c32 = arith.constant 32 : index
%c384 = arith.constant 384 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x384xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x384xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%thread_id_x = gpu.thread_id x
%2 = affine.apply #map()[%thread_id_x, %workgroup_id_x]
@@ -75,12 +73,10 @@
// Make sure memref.load from uniform buffers are hoisted out as uniform code.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<uniform_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [32, 1, 1] subgroup_size = 32>
#map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)>
@@ -93,14 +89,14 @@
%cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32>
%c32 = arith.constant 32 : index
%c384 = arith.constant 384 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) offset(%c0) : memref<1xvector<4xi32>, #hal.descriptor_type<uniform_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) offset(%c0) : memref<1xvector<4xi32>, #hal.descriptor_type<uniform_buffer>>
%1 = memref.load %0[%c0] : memref<1xvector<4xi32>, #hal.descriptor_type<uniform_buffer>>
%2 = vector.extractelement %1[%c0 : index] : vector<4xi32>
%3 = vector.extractelement %1[%c1 : index] : vector<4xi32>
%4 = arith.index_castui %2 : i32 to index
%5 = arith.index_castui %3 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : memref<128x384xf32>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : memref<128xf32>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : memref<128x384xf32>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : memref<128xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%thread_id_x = gpu.thread_id x
%8 = affine.apply #map()[%thread_id_x, %workgroup_id_x]
@@ -119,14 +115,14 @@
// CHECK-LABEL: func.func @reduce_uniform_buffer_offset()
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[C0]]]
// CHECK: %[[EXT0:.+]] = vector.extractelement %[[LOAD]][%[[C0]] : index] : vector<4xi32>
// CHECK: %[[EXT1:.+]] = vector.extractelement %[[LOAD]][%[[C1]] : index] : vector<4xi32>
// CHECK: %[[OFFSET0:.+]] = arith.index_castui %[[EXT0]] : i32 to index
// CHECK: %[[OFFSET1:.+]] = arith.index_castui %[[EXT1]] : i32 to index
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET0]])
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET1]])
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET0]])
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET1]])
// CHECK: scf.for
// CHECK-COUNT-5: gpu.shuffle
// CHECK: arith.addf
@@ -136,12 +132,10 @@
// Make sure memref.load from readonly storage buffers are hoisted out as uniform code.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0, s1] -> (s1 * 2 + s0 floordiv 32)>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [32, 1, 1] subgroup_size = 32>
@@ -154,14 +148,14 @@
%cst_1 = arith.constant dense<3.840000e+02> : vector<1xf32>
%c32 = arith.constant 32 : index
%c384 = arith.constant 384 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<1xvector<4xi32>, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<1xvector<4xi32>, #hal.descriptor_type<storage_buffer>>
%1 = memref.load %0[%c0] : memref<1xvector<4xi32>, #hal.descriptor_type<storage_buffer>>
%2 = vector.extractelement %1[%c0 : index] : vector<4xi32>
%3 = vector.extractelement %1[%c1 : index] : vector<4xi32>
%4 = arith.index_castui %2 : i32 to index
%5 = arith.index_castui %3 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : memref<128x384xf32>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : memref<128xf32>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : memref<128x384xf32>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : memref<128xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%thread_id_x = gpu.thread_id x
%8 = affine.apply #map()[%thread_id_x, %workgroup_id_x]
@@ -180,14 +174,14 @@
// CHECK-LABEL: func.func @reduce_storage_buffer_offset()
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[C0]]]
// CHECK: %[[EXT0:.+]] = vector.extractelement %[[LOAD]][%[[C0]] : index] : vector<4xi32>
// CHECK: %[[EXT1:.+]] = vector.extractelement %[[LOAD]][%[[C1]] : index] : vector<4xi32>
// CHECK: %[[OFFSET0:.+]] = arith.index_castui %[[EXT0]] : i32 to index
// CHECK: %[[OFFSET1:.+]] = arith.index_castui %[[EXT1]] : i32 to index
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET0]])
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET1]])
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET0]])
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET1]])
// CHECK: scf.for
// CHECK-COUNT-5: gpu.shuffle
// CHECK: arith.addf
@@ -195,11 +189,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [32, 1, 1] subgroup_size = 32>
module {
@@ -208,8 +200,8 @@
%cst = arith.constant dense<0.000000e+00> : vector<1xf32>
%cst_0 = arith.constant 0.000000e+00 : f32
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x32xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%alloc = memref.alloc() {alignment = 64 : i64} : memref<32xf32, #gpu.address_space<workgroup>>
%2 = vector.transfer_read %0[%workgroup_id_x, %c0], %cst_0 {in_bounds = [true]} : memref<128x32xf32>, vector<32xf32>
@@ -234,12 +226,10 @@
// Check that we multi-row matvec gets distributed across subgroup threads.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [64, 1, 1] subgroup_size = 64>
#map = affine_map<()[s0] -> (s0 * 4)>
@@ -253,11 +243,11 @@
%c512 = arith.constant 512 : index
%cst_1 = arith.constant 0.000000e+00 : f16
%thread_id_x = gpu.thread_id x
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x4096xf16, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x4096xf16, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<1x4096xf16, #hal.descriptor_type<storage_buffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<32000x4096xf16, #hal.descriptor_type<storage_buffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<32000x4096xf16, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %1, 64 : memref<32000x4096xf16, #hal.descriptor_type<storage_buffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<1x32000xf16, #hal.descriptor_type<storage_buffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<1x32000xf16, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %2, 64 : memref<1x32000xf16, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%3 = affine.apply #map()[%workgroup_id_x]
@@ -291,19 +281,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [32, 1, 1] subgroup_size = 32>
module {
func.func @simple_nd_write() attributes {translation_info = #translation_info} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<4x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<4x1024xf32>
%2 = vector.transfer_read %0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<4x1024xf32>, vector<4x1024xf32>
vector.transfer_write %2, %1[%c0, %c0] {in_bounds = [true, true]} : vector<4x1024xf32>, memref<4x1024xf32>
return
diff --git a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp
index 7ae89bd..c45c62f 100644
--- a/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/IREEExpandStridedMetadata.cpp
@@ -156,7 +156,7 @@
// `hal.interface.binding.subspan` is
//
// ```mlir
- // hal.interface.binding.subspan layout(#pipeline_layout) set(0)
+ // hal.interface.binding.subspan layout(#pipeline_layout)
// binding(1) offset(%offset)
// : memref<?x?xf32, strided<[?, 1], offset: 64]>>{%s0, %s1}
// ```
@@ -167,7 +167,7 @@
// #map = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)>
// %linearSize = affine.apply #map()[%offset, %s0, %s1]
// %c0 = arith.constant 0 : index
- // hal.interface.binding.subspan layout(#pipeline_layout) set(0)
+ // hal.interface.binding.subspan layout(#pipeline_layout)
// binding(1) offset(%c0)
// : memref<?xf32>{%linearSize}
// ```
@@ -197,7 +197,7 @@
Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
auto linearInterfaceBinding =
rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
- loc, newBufferType, binding.getLayoutAttr(), binding.getSetAttr(),
+ loc, newBufferType, binding.getLayoutAttr(),
binding.getBindingAttr(), zero, dynamicLinearShape,
binding.getAlignmentAttr(), binding.getDescriptorFlagsAttr());
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
index e966749..2de3dc3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
@@ -616,9 +616,9 @@
auto newResultType = IREE::Flow::DispatchTensorType::get(
resultType.getAccess(), convertedBoundType);
rewriter.replaceOpWithNewOp<IREE::HAL::InterfaceBindingSubspanOp>(
- subspanOp, newResultType, subspanOp.getLayout(), subspanOp.getSet(),
- subspanOp.getBinding(), subspanOp.getByteOffset(), newDynamicDims,
- subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr());
+ subspanOp, newResultType, subspanOp.getLayout(), subspanOp.getBinding(),
+ subspanOp.getByteOffset(), newDynamicDims, subspanOp.getAlignmentAttr(),
+ subspanOp.getDescriptorFlagsAttr());
return success();
}
};
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir b/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir
index cbbcde4..ab2a5bd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/bufferize_copy_only_dispatches.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-bufferize-copy-only-dispatches))" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 13, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 13, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_insert_slice() {
%slice_size = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -20,9 +18,9 @@
%dest_binding_size_x = hal.interface.constant.load layout(#pipeline_layout) ordinal(10) : index
%source_binding_size_y = hal.interface.constant.load layout(#pipeline_layout) ordinal(11) : index
%source_binding_size_x = hal.interface.constant.load layout(#pipeline_layout) ordinal(12) : index
- %source = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %source = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%source_binding_size_y, %source_binding_size_x}
- %dest = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %dest = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%dest_binding_size_y, %dest_binding_size_x}
%source_load = flow.dispatch.tensor.load %source, offsets = [%source_offset_y, %source_offset_x],
sizes = [1, %slice_size], strides = [%source_stride_y, %source_stride_x]
@@ -43,8 +41,8 @@
// CHECK-DAG: %[[SOURCE_OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(6)
// CHECK-DAG: %[[SOURCE_STRIDE_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(7)
// CHECK-DAG: %[[SOURCE_STRIDE_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(8)
-// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][%[[SOURCE_OFFSET_Y]], %[[SOURCE_OFFSET_X]]] [1, %[[SLICE_SIZE]]] [%[[SOURCE_STRIDE_Y]], %[[SOURCE_STRIDE_X]]]
// CHECK-DAG: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][%[[DEST_OFFSET_Y]], %[[DEST_OFFSET_X]]] [%[[SLICE_SIZE]], 1] [%[[DEST_STRIDE_Y]], %[[DEST_STRIDE_X]]]
// CHECK: linalg.generic
@@ -53,24 +51,22 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @UpSampling1D() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x16x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x8x3xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x16x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x8x3xf32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [2, 1, 3], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x8x3xf32>> -> tensor<2x3xf32>
flow.dispatch.tensor.store %2, %0, offsets = [0, 0, 0], sizes = [2, 1, 3], strides = [1, 1, 1] : tensor<2x3xf32> -> !flow.dispatch.tensor<readwrite:tensor<2x16x3xf32>>
return
}
// CHECK-LABEL: func.func @UpSampling1D()
-// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[SOURCE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SOURCE_SUBVIEW:.+]] = memref.subview %[[SOURCE]][0, 0, 0] [2, 1, 3]
// CHECK-DAG: %[[DEST_SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0, 0] [2, 1, 3]
// CHECK: linalg.generic
@@ -79,15 +75,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @concatenate_cst() {
%cst = arith.constant dense<0> : tensor<2x3xi32>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x5xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x5xi32>>
flow.dispatch.tensor.store %cst, %0, offsets = [0, 2], sizes = [2, 3], strides = [1, 1] : tensor<2x3xi32> -> !flow.dispatch.tensor<readwrite:tensor<2x5xi32>>
return
}
@@ -103,15 +97,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @already_bufferized() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1001xf32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1001xf32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<1001xf32, #hal.descriptor_type<storage_buffer>>
%alloc = memref.alloc() : memref<1001xf32>
linalg.fill ins(%cst : f32) outs(%alloc : memref<1001xf32>)
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir b/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir
index 023b33a..54819ce 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/canonicalize_interface_load_store.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-cleanup-buffer-alloc-view))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @fold_reshape_load()
func.func @fold_reshape_load() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
- // CHECK: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
+ // CHECK: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[ARG]], {{.*}} : !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>> -> tensor<3x3x96xf32>
%3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>> -> tensor<3x3x1x96xf32>
%4 = tensor.collapse_shape %3 [[0, 1, 2, 3]] : tensor<3x3x1x96xf32> into tensor<864xf32>
@@ -26,19 +24,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @fold_reshape_store()
func.func @fold_reshape_store() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
- // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x1x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
+ // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x1x96xf32>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %{{.*}}, {{.*}}
%3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>> -> tensor<3x3x1x96xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[LOAD]] : tensor<3x3x1x96xf32>)
@@ -52,10 +48,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @dont_fold_reshape_with_not_full_load()
func.func @dont_fold_reshape_with_not_full_load() {
@@ -63,8 +57,8 @@
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c96 = arith.constant 96 : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<6x3x1x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<6x3x1x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x3x96xf32>>
%3 = flow.dispatch.tensor.load %1, offsets = [%c3, %c0, %c0, %c0], sizes = [%c3, %c3, %c1, %c96], strides = [%c1, %c1, %c1, %c1] : !flow.dispatch.tensor<readonly:tensor<6x3x1x96xf32>> -> tensor<3x3x1x96xf32>
// CHECK: tensor.collapse_shape
// CHECK: tensor.expand_shape
@@ -76,10 +70,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @dont_fold_dynamic_reshape()
func.func @dont_fold_dynamic_reshape() {
@@ -88,8 +80,8 @@
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%dim2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x96xf32>>{%dim0, %dim1}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<?x12x8xf32>>{%dim2}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x96xf32>>{%dim0, %dim1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<?x12x8xf32>>{%dim2}
%3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0], sizes =[%dim0, %dim1, 96], strides=[1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x96xf32>>{%dim0, %dim1} -> tensor<?x?x96xf32>
// CHECK: tensor.collapse_shape
// CHECK: tensor.expand_shape
@@ -102,10 +94,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 288)>
// CHECK-LABEL: func.func @fold_reshape_slice_store
@@ -114,9 +104,9 @@
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<1728xf32>>
- // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<6x3x1x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<1728xf32>>
+ // CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor<writeonly:tensor<6x3x1x96xf32>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %{{.*}}, {{.*}}
%3 = flow.dispatch.tensor.load %1, offsets=[0, 0, 0, 0], sizes =[3, 3, 1, 96], strides=[1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x1x96xf32>> -> tensor<3x3x1x96xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[LOAD]] : tensor<3x3x1x96xf32>)
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir
index 1f99752..7a4c471 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_bf16_to_uint16_buffers.mlir
@@ -1,27 +1,25 @@
// RUN: iree-opt --split-input-file \
// RUN: --iree-codegen-convert-bf16-to-uint16-buffers %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: @bf16_conversion
func.func @bf16_conversion() {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
- // CHECK-DAG: %[[BUF0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
- // CHECK-DAG: %[[BUF1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
- // CHECK-DAG: %[[BUF2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
+ // CHECK-DAG: %[[BUF0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
+ // CHECK-DAG: %[[BUF1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
+ // CHECK-DAG: %[[BUF2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref<?xi16, #spirv.storage_class<StorageBuffer>>{%c8}
// CHECK-DAG: %[[LOAD0:.+]] = memref.load %[[BUF0]][%arg0] : memref<?xi16, #spirv.storage_class<StorageBuffer>>
// CHECK-DAG: %[[LOAD1:.+]] = memref.load %[[BUF1]][%arg0] : memref<?xi16, #spirv.storage_class<StorageBuffer>>
// CHECK: memref.store %{{.+}}, %[[BUF2]][%arg0] : memref<?xi16, #spirv.storage_class<StorageBuffer>>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<?xbf16, #spirv.storage_class<StorageBuffer>>{%c8}
%3 = gpu.thread_id x
%4 = gpu.block_dim x
scf.for %arg0 = %3 to %c8 step %4 {
@@ -48,11 +46,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: @iree_uk_mmt4d
@@ -77,11 +73,11 @@
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x3x8x1xbf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<1x3x8x1xbf16>
memref.assume_alignment %0, 64 : memref<1x3x8x1xbf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>>
memref.assume_alignment %1, 64 : memref<1x3x8x1xbf16, strided<[24, 8, 1, 1], offset: 32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>>
memref.assume_alignment %2, 64 : memref<1x1x8x8xf32, strided<[64, 64, 8, 1], offset: 32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -131,10 +127,8 @@
// is rewritten correctly, along with any following ops.
// See issue https://github.com/iree-org/iree/issues/17177
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: module @extract_strided_metadata
@@ -144,7 +138,7 @@
func.func @external_func_entry_point() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>>
// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan {{.*}} : memref<1x8x768xi16,
%base_buffer, %offset, %sizes:3, %strides:3 = iree_codegen.extract_strided_metadata %2 : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> -> memref<bf16>, index, index, index, index, index, index, index
// CHECK: {{.+}} = iree_codegen.extract_strided_metadata %[[SUBSPAN]] : memref<1x8x768xi16,
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
index 1101469..a185567 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
@@ -1,21 +1,19 @@
// RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-codegen-convert-to-destination-passing-style),canonicalize,cse)" --split-input-file | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul() {
%m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -40,10 +38,10 @@
return
}
// CHECK: func.func @matmul()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
@@ -56,12 +54,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_fill() {
%cst = arith.constant 0.0 : f32
@@ -69,9 +65,9 @@
%m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -97,9 +93,9 @@
return
}
// CHECK: func.func @matmul_fill()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
@@ -114,21 +110,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_inplace() {
%c0 = arith.constant 0 : index
%m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -153,9 +147,9 @@
return
}
// CHECK: func.func @matmul_inplace()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
@@ -168,11 +162,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reshape_simple() {
%c0 = arith.constant 0 : index
@@ -180,27 +172,25 @@
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c12 = arith.constant 12 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
return
}
// CHECK: func.func @reshape_simple()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]]
// CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]]
// CHECK: flow.dispatch.tensor.store %[[RESHAPE]], %[[RET0]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reshape_fused_source() {
%c0 = arith.constant 0 : index
@@ -208,8 +198,8 @@
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c12 = arith.constant 12 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
%4 = tensor.empty() : tensor<3x4xi32>
@@ -225,8 +215,8 @@
return
}
// CHECK: func.func @reshape_fused_source()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]]
// CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]]
// CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]]
@@ -237,12 +227,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reshape_fused_source_and_copyout() {
%c0 = arith.constant 0 : index
@@ -250,9 +238,9 @@
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c12 = arith.constant 12 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%4 = tensor.expand_shape %3 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
%5 = tensor.empty() : tensor<3x4xi32>
@@ -269,9 +257,9 @@
return
}
// CHECK: func.func @reshape_fused_source_and_copyout()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]]
// CHECK: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]]
// CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[SOURCE]]
@@ -283,11 +271,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reshape_fused_target() {
%c0 = arith.constant 0 : index
@@ -295,8 +281,8 @@
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c12 = arith.constant 12 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<12xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<12xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x4xi32>> -> tensor<3x4xi32>
%3 = tensor.empty() : tensor<3x4xi32>
%4 = linalg.generic {
@@ -312,8 +298,8 @@
return
}
// CHECK: func.func @reshape_fused_target()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SOURCE:.+]] = flow.dispatch.tensor.load %[[ARG0]]
// CHECK-DAG: %[[TARGET:.+]] = flow.dispatch.tensor.load %[[RET0]]
// CHECK-DAG: %[[RESHAPE_EXPAND:.+]] = tensor.expand_shape %[[TARGET]] {{\[}}[0, 1]{{\]}}
@@ -325,12 +311,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @cast_followed_by_store() {
%c0 = arith.constant 0 : index
@@ -339,9 +323,9 @@
%c64 = arith.constant 64 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x32x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x32x64xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -368,9 +352,9 @@
return
}
// CHECK: func.func @cast_followed_by_store()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
@@ -385,13 +369,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 12, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 12, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @multi_result() {
@@ -408,10 +390,10 @@
%dim5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
%dim6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
%dim7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%dim0, %dim1}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%dim2, %dim3}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%dim4, %dim5}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%dim6, %dim7}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%dim0, %dim1}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%dim2, %dim3}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%dim4, %dim5}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%dim6, %dim7}
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(9) : index
%6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(10) : index
@@ -448,10 +430,10 @@
return
}
// CHECK: func.func @multi_result()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RESULT0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RESULT1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RESULT0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RESULT1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK-DAG: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]]
@@ -466,12 +448,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unused_ins_operand() {
%c64 = arith.constant 64 : index
@@ -489,9 +469,9 @@
%9 = arith.index_cast %3 : i32 to index
%10 = arith.index_cast %4 : i32 to index
%11 = arith.index_cast %5 : i32 to index
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%6, %7, %8}
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c64) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%9, %10, %11}
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xi32>>{%9, %10, %8}
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%6, %7, %8}
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c64) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%9, %10, %11}
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xi32>>{%9, %10, %8}
%15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [%9, %10, %11], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%9, %10, %11} -> tensor<?x?x?xi32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -531,8 +511,8 @@
return
}
// CHECK-LABEL: func.func @unused_ins_operand()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[IN_VIEW:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[OUT_VIEW:.+]] = flow.dispatch.tensor.load %[[OUT]]
// CHECK: linalg.generic
@@ -541,17 +521,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @cumsum__2x2x2x2x2x2x2() {
%cst = arith.constant dense<0.000000e+00> : tensor<2x2x2x2x2x2x2xf32>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x2x2x2x2x2x2xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x2x2x2x2x2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x2x2x2x2x2x2xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x2x2x2x2x2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0, 0], sizes = [3, 2, 2, 2, 2, 2, 2], strides = [1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x2x2x2x2x2x2xf32>> -> tensor<3x2x2x2x2x2x2xf32>
%3 = tensor.empty() : tensor<2xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0 + d7, d1, d2, d3, d4, d5, d6)>,
@@ -577,17 +555,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reduce_window_max_4x6xf32() {
%cst = arith.constant dense<0xFF800000> : tensor<2x2xf32>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x6xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x6xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 4, 6], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x6xf32>> -> tensor<2x4x6xf32>
%3 = tensor.empty() : tensor<2x2x3xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d2, d0 * 2 + d3, d1 * 3 + d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%2, %3 : tensor<2x4x6xf32>, tensor<2x2x3xf32>) outs(%cst : tensor<2x2xf32>) {
@@ -607,14 +583,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sort1D() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4xi32>>
%1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<4xi32>> -> tensor<4xi32>
%2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<4xi32>) {
^bb0(%arg0: i32, %arg1: i32):
@@ -625,7 +599,7 @@
return
}
// CHECK: func.func @sort1D()
-// CHECK-DAG: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK-DAG: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[BUF]]
// CHECK: %[[SORT:.+]] = iree_linalg_ext.sort
// CHECK-SAME: outs(%[[IN]] : tensor<4xi32>)
@@ -633,18 +607,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @clone_index_computations() {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%4 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x]
@@ -664,31 +636,29 @@
return
}
// CHECK-LABEL: func @clone_index_computations()
-// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: scf.for
// CHECK: %[[TILESIZE:.+]] = affine.min
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[OUTPUT]], offsets = [{{.+}}], sizes = [%[[TILESIZE]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<5, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @gemm_gather() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<128xi32>>
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<128xi32>>
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1]
@@ -724,19 +694,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reduce_broadcast_generic() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<10x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<10x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<10x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<10x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets= [0, 0], sizes = [10, 1024], strides= [1, 1]
: !flow.dispatch.tensor<readonly:tensor<10x1024xf32>> -> tensor<10x1024xf32>
%4 = flow.dispatch.tensor.load %1, offsets= [0], sizes = [10], strides= [1]
@@ -767,7 +735,7 @@
return
}
// CHECK-LABEL: func @reduce_broadcast_generic
-// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]]
// CHECK: %[[RESULT:.+]]:2 = linalg.generic
// CHECK: linalg.generic
@@ -776,16 +744,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pack() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x2x2xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x2x2xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%3 = tensor.empty() : tensor<2x2x2x2xi32>
%pack = tensor.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32>
@@ -793,24 +759,22 @@
return
}
// CHECK-LABEL: func.func @pack
-// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]]
// CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]]
// CHECK: tensor.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unpack() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x2x2xi32>
%3 = tensor.empty() : tensor<4x4xi32>
%4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
@@ -818,19 +782,17 @@
return
}
// CHECK-LABEL: func.func @unpack
-// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]]
// CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]]
// CHECK: tensor.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -843,8 +805,8 @@
%0:2 = iree_codegen.query_tile_sizes tensor<16x16xi32, #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2]>> -> index, index
%1 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#0]
%2 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#1]
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c512) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xi32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c512) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xi32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -877,26 +839,24 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @multi_result_dispatches() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<120x240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<240x360xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<120xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<120x360xf32>>
- %30 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0)
+ %30 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<120x360xf32>>
%4 = tensor.empty() : tensor<120x360xf32>
%cst = arith.constant 0.0 : f32
@@ -925,12 +885,12 @@
return
}
// CHECK-LABEL: func @multi_result_dispatches()
-// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[BIAS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[BIAS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK-DAG: %[[RESULT0:.+]] = flow.dispatch.tensor.load %[[RESULT_BINDING0]]
-// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(4)
+// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(4)
// CHECK-DAG: %[[RESULT1:.+]] = flow.dispatch.tensor.load %[[RESULT_BINDING1]]
// CHECK: %[[FILL:.+]] = linalg.fill
// CHECK-SAME: outs(%[[RESULT1]] :
@@ -948,12 +908,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @if_conversion() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -961,11 +919,11 @@
%size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%cond = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i1
%result_offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
- %then = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %then = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0}
- %else = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %else = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%0}
%then_value = flow.dispatch.tensor.load %then, offsets = [%offset], sizes = [%size], strides = [1]
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0} -> tensor<?xf32>
@@ -985,9 +943,9 @@
// CHECK-DAG: %[[S1:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2)
// CHECK-DAG: %[[COND:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(3)
// CHECK-DAG: %[[OFFSET:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(4)
-// CHECK-DAG: %[[THEN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ELSE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[THEN_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ELSE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[THEN:.+]] = flow.dispatch.tensor.load %[[THEN_BINDING]]
// CHECK-DAG: %[[ELSE:.+]] = flow.dispatch.tensor.load %[[ELSE_BINDING]]
// CHECK: scf.if %[[COND]] {
@@ -1003,11 +961,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @if_conversion_clone_offsets() {
%cst = arith.constant 0.000000e+00 : f32
@@ -1022,8 +978,8 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%6, %7}
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%8, %9}
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%6, %7}
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%8, %9}
%12 = affine.apply affine_map<()[s0, s1] -> (-s0 + s1 + (s0 ceildiv 16) * 16)>()[%6, %6]
%13 = affine.apply affine_map<()[s0, s1] -> (-s0 + s1 + (s0 ceildiv 16) * 16)>()[%7, %7]
%workgroup_id_x = hal.interface.workgroup.id[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir
index 46f30fe..771ef0a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/convolution_to_igemm.mlir
@@ -31,21 +31,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{thread = [2, 16], subgroup = [2, 16]}>
#map = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @fold_with_interface_tensor() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x16x16x4xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x4x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x14x14x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x16x16x4xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x4x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x14x14x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 16, 16, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x16x16x4xf32>> -> tensor<1x16x16x4xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 4, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x4x16xf32>> -> tensor<3x3x4x16xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<1x14x14x16xf32>> -> tensor<1x14x14x16xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir
index 38ee86b..d39c341 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_conv2d.mlir
@@ -3,19 +3,17 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0, 0, 0, 0], [1, 1, 1, 4, 0, 0], [0, 0, 0, 0, 1, 4], [0, 0, 0, 0, 0, 0]]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
#translation = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
module {
func.func @restrict_num_workgroups() attributes {hal.executable.target = #executable_target_system_elf_arm_64_, translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x4xf32>>
%input = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 1, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>> -> tensor<1x1x4x4xf32>
%filter = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 4, 4], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>> -> tensor<1x4x4xf32>
%5 = tensor.empty() : tensor<1x1x1x4xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir b/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir
index 45301a4..f603d39 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/eliminate_empty_tensors.mlir
@@ -2,17 +2,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @eliminate_empty_tensors_with_store_op() {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c32 = arith.constant 32 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
%1 = tensor.empty() : tensor<32x384xf32>
scf.for %arg0 = %c0 to %c128 step %c32 {
%2 = scf.for %arg1 = %c0 to %c32 step %c8 iter_args(%arg2 = %1) -> (tensor<32x384xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir b/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir
index 84f84cc..f6b2ee2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/emulate_narrow_type.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-codegen-emulate-narrow-type %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @memref_i4_to_i8() -> i4 {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x15xi4>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x15xi4>
%1 = memref.load %0[%c0, %c0] : memref<3x15xi4>
return %1 : i4
}
@@ -16,14 +14,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @memref_i4_to_i8_dynamic(%arg0 : index, %arg1 : index, %arg2 : index) -> i4 {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) flags(ReadOnly) : memref<?x?xi4, strided<[?, 1], offset: ?>>{%arg1, %arg2}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) flags(ReadOnly) : memref<?x?xi4, strided<[?, 1], offset: ?>>{%arg1, %arg2}
%1 = memref.load %0[%c0, %c0] : memref<?x?xi4, strided<[?, 1], offset: ?>>
return %1 : i4
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir b/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir
index 48c1922..bb3e4bd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/flatten_memref_subspan.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --iree-codegen-flatten-memref-subspan --canonicalize --allow-unregistered-dialect %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_subspan_with_offset(%offset : index, %i0: index, %i1: index, %i2: index) -> f32 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>>
%val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xf32, strided<[56, 8, 1], offset: ?>>
return %val: f32
}
@@ -17,20 +15,18 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index)
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]]
// CHECK: return %[[LOAD]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @store_subspan_with_offset(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>>
memref.store %value, %subspan[%i0, %i1, %i2] : memref<2x3x4xf32, strided<[12, 4, 1], offset: ?>>
return
}
@@ -41,19 +37,17 @@
// CHECK-SAME: (%[[VALUE:.+]]: f32, %[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index)
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: memref.store %[[VALUE]], %[[SUBSPAN]][%[[INDEX]]] : memref<?xf32>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_subspan_with_vector_element(%offset : index, %i0: index, %i1: index, %i2: index) -> vector<4xf32> {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>>
%val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xvector<4xf32>, strided<[56, 8, 1], offset:?>>
return %val: vector<4xf32>
}
@@ -64,13 +58,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_subspan_with_16bit_element(%offset : index, %i0: index, %i1: index, %i2: index) -> f16 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>>
%val = memref.load %subspan[%i0, %i1, %i2] : memref<6x7x8xf16, strided<[56, 8, 1], offset:?>>
return %val: f16
}
@@ -81,15 +73,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @store_subspan_with_leading_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) {
%dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>{%dim}
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>{%dim}
memref.store %value, %subspan[%i0, %i1, %i2] : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>
return
}
@@ -101,23 +91,21 @@
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index
// CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM]], %[[OFFSET]]]
-// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] : memref<?xf32>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @store_subspan_with_all_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index, %i3: index) {
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%dim2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%dim3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<?x?x?x?xf32, strided<[?, ?, ?, 1], offset: ?>>{%dim0, %dim1, %dim2, %dim3}
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<?x?x?x?xf32, strided<[?, ?, ?, 1], offset: ?>>{%dim0, %dim1, %dim2, %dim3}
memref.store %value, %subspan[%i0, %i1, %i2, %i3] : memref<?x?x?x?xf32, strided<[?, ?, ?, 1], offset: ?>>
return
}
@@ -132,21 +120,19 @@
// CHECK: %[[DIM2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
// CHECK: %[[DIM3:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index
// CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM0]], %[[DIM1]], %[[DIM2]], %[[DIM3]], %[[OFFSET]]]
-// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[DIM3]], %[[I3]], %[[DIM2]], %[[I2]], %[[I0]], %[[DIM1]], %[[I1]]]
// CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @store_subspan_with_mixed_dynamic_dim(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index, %i3: index) {
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<?x4x?x8xf32, strided<[?, ?, 8, 1], offset: ?>>{%dim0, %dim1}
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<?x4x?x8xf32, strided<[?, ?, 8, 1], offset: ?>>{%dim0, %dim1}
memref.store %value, %subspan[%i0, %i1, %i2, %i3] : memref<?x4x?x8xf32, strided<[?, ?, 8, 1], offset: ?>>
return
}
@@ -159,20 +145,18 @@
// CHECK: %[[DIM0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index
// CHECK: %[[DIM2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index
// CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM0]], %[[DIM2]], %[[OFFSET]]]
-// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I3]], %[[DIM2]], %[[I2]], %[[I0]], %[[I1]]]
// CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @store_subspan_with_flow_control(%value: f32, %offset : index, %i0: index, %i1: index, %i2: index) {
%dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>{%dim}
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>{%dim}
scf.for %i = %i0 to %i1 step %i2 {
memref.store %value, %subspan[%i0, %i1, %i2] : memref<?x3x4xf32, strided<[12, 4, 1], offset:?>>
}
@@ -186,7 +170,7 @@
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[DIM:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index
// CHECK: %[[SIZE:.+]] = affine.apply #[[$SIZE_MAP]]()[%[[DIM]], %[[OFFSET]]]
-// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: scf.for
// CHECK: %[[INDEX:.+]] = affine.apply #[[$OFFSET_MAP]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: memref.store %[[VALUE]], %[[DST]][%[[INDEX]]] : memref<?xf32>
@@ -256,14 +240,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_read_subspan_with_offset(
%arg0 : index, %arg1: index, %arg2: index, %arg3: index) -> vector<4xf32> {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
%cst = arith.constant 0.0 : f32
%val = vector.transfer_read %subspan[%arg1, %arg2, %arg3], %cst {in_bounds = [true]} : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>, vector<4xf32>
return %val: vector<4xf32>
@@ -278,21 +260,19 @@
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]]]
-// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]]
// CHECK: %[[VEC:.+]] = vector.transfer_read %[[MEMREF]][%[[INDEX]]]
// CHECK: return %[[VEC]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_write_subspan_with_offset(
%arg0 : index, %arg1: index, %arg2: index, %arg3: index, %arg4 : vector<4xf32>) {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%arg0) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
vector.transfer_write %arg4, %subspan[%arg1, %arg2, %arg3] {in_bounds = [true]} : vector<4xf32>, memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
return
}
@@ -307,21 +287,19 @@
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: vector<4xf32>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]]]
-// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[MEMREF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]]
// CHECK: vector.transfer_write %[[ARG4]], %[[MEMREF]][%[[INDEX]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_store_subspan_with_zero_offset(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
- %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xf32>{%arg0, %arg1}
- %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xf32>{%arg0, %arg1}
+ %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xf32>{%arg0, %arg1}
+ %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xf32>{%arg0, %arg1}
%val = memref.load %subspan0[%arg2, %arg3] : memref<?x?xf32>
memref.store %val, %subspan1[%arg2, %arg3] : memref<?x?xf32>
return
@@ -335,9 +313,9 @@
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[D0:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]], %[[ARG1]]]
-// CHECK: %[[BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[D0]]}
+// CHECK: %[[BINDING0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[D0]]}
// CHECK: %[[D1:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]], %[[ARG1]]]
-// CHECK: %[[BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref<?xf32>{%[[D1]]}
+// CHECK: %[[BINDING1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref<?xf32>{%[[D1]]}
// CHECK: %[[OFFSET0:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]]
// CHECK: %[[VAL:.+]] = memref.load %[[BINDING0]][%[[OFFSET0]]]
// CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]]
@@ -345,16 +323,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_store_rank_zero_subspan_with_zero_offset() {
%zero = arith.constant 0 : index
- %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%zero) : memref<f32>
- %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%zero) : memref<f32>
+ %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%zero) : memref<f32>
+ %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%zero) : memref<f32>
%val = memref.load %subspan0[] : memref<f32>
memref.store %val, %subspan1[] : memref<f32>
return
@@ -362,20 +338,18 @@
//CHECK-LABEL: func.func @load_store_rank_zero_subspan_with_zero_offset
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<f32>
-// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref<f32>
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<f32>
+// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref<f32>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_store_rank_zero_subspan_with_offset(%offset : index) {
- %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<f32, strided<[], offset:?>>
- %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref<f32, strided<[], offset:?>>
+ %subspan0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<f32, strided<[], offset:?>>
+ %subspan1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref<f32, strided<[], offset:?>>
%val = memref.load %subspan0[] : memref<f32, strided<[], offset:?>>
memref.store %val, %subspan1[] : memref<f32, strided<[], offset:?>>
return
@@ -387,9 +361,9 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE0:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE0]]}
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE0]]}
// CHECK-DAG: %[[SIZE1:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) offset(%[[C0]]) : memref<?xf32>{%[[SIZE1]]}
+// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) offset(%[[C0]]) : memref<?xf32>{%[[SIZE1]]}
// CHECK: %[[INDEX0:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]]
// CHECK: %[[LOAD:.+]] = memref.load %[[SPAN0]][%[[INDEX0]]] : memref<?xf32>
// CHECK: %[[INDEX1:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]]
@@ -397,13 +371,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @collapse_shape(%offset : index, %i0 : index, %i1 : index) -> f32 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>>
%collapse = memref.collapse_shape %subspan[[0, 1], [2, 3]] : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>> into memref<20x42xf32, strided<[42, 1], offset:?>>
%value = memref.load %collapse[%i0, %i1] : memref<20x42xf32, strided<[42, 1], offset:?>>
return %value : f32
@@ -415,19 +387,17 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]]
// CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @expand_shape(%offset : index, %i0: index, %i1: index, %i2: index, %i3: index) -> f32 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<20x42xf32, strided<[42, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<20x42xf32, strided<[42, 1], offset:?>>
%expand = memref.expand_shape %subspan[[0, 1], [2, 3]] output_shape [4, 5, 6, 7] : memref<20x42xf32, strided<[42, 1], offset:?>> into memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>>
%value = memref.load %expand[%i0, %i1, %i2, %i3] : memref<4x5x6x7xf32, strided<[210, 42, 7, 1], offset:?>>
return %value : f32
@@ -439,19 +409,17 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index, %[[I3:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]], %[[I3]]]
// CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @expand_shape2(%offset : index, %i0: index, %i1: index) -> f32 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>>
%expand = memref.expand_shape %subspan [[0, 1]] output_shape [1, 128] : memref<128xf32, strided<[1], offset: ?>> into memref<1x128xf32, strided<[128, 1], offset: ?>>
%value = memref.load %expand[%i0, %i1] : memref<1x128xf32, strided<[128, 1], offset: ?>>
return %value : f32
@@ -463,7 +431,7 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]]
// CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]]
@@ -473,13 +441,11 @@
// be able to do so (a memref cast is inserted to move between unknown and
// known dim).
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_collapse_shape_to_1d_static(%offset : index, %i: index) {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>>
%collapse = memref.collapse_shape %subspan [[0, 1, 2]] : memref<6x7x8xf32, strided<[56, 8, 1], offset:?>> into memref<336xf32, strided<[1], offset: ?>>
"unregistered.opaque"(%collapse) : (memref<336xf32, strided<[1], offset: ?>>) -> ()
}
@@ -491,20 +457,18 @@
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[OFFSET:.+]] = affine.apply #[[$MAP0]]()[%[[ARG0]]
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[ARG0]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[SUBSPAN]][%[[OFFSET]]] [336] [1] : memref<?xf32> to memref<336xf32, strided<[1], offset: ?>>
// CHECK: "unregistered.opaque"(%[[SUBVIEW]])
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @subview(%offset : index, %i0: index, %i1: index) -> f32 {
%c0 = arith.constant 0 : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x128xf32, strided<[128, 1], offset: ?>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x128xf32, strided<[128, 1], offset: ?>>
%expand = memref.subview %subspan[%i0, %i1][16, 8][1, 1] : memref<32x128xf32, strided<[128, 1], offset: ?>> to memref<16x8xf32, strided<[128, 1], offset: ?>>
%value = memref.load %expand[%c0, %c0] : memref<16x8xf32, strided<[128, 1], offset: ?>>
return %value : f32
@@ -516,7 +480,7 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xf32>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]]]
// CHECK: memref.load %[[SUBSPAN]][%[[INDEX]]]
@@ -553,13 +517,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @subgroup_mma_load_with_offset(%offset : index, %i0: index, %i1: index) -> !gpu.mma_matrix<16x16xf16, "AOp"> {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3>
%0 = gpu.subgroup_mma_load_matrix %subspan[%i0, %i1] {leadDimension = 32 : index} : memref<32x32xf16, strided<[32, 1], offset: ?>, 3> -> !gpu.mma_matrix<16x16xf16, "AOp">
return %0 : !gpu.mma_matrix<16x16xf16, "AOp">
}
@@ -570,20 +532,18 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index)
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref<?xf16, 3>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref<?xf16, 3>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP2]]()[%[[OFFSET]], %[[I0]], %[[I1]]]
// CHECK: %[[LD:.+]] = gpu.subgroup_mma_load_matrix %[[SUBSPAN]][%[[INDEX]]] {leadDimension = 32 : index}
// CHECK: return %[[LD]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @subgroup_mma_store_with_offset(%offset : index, %i0: index, %i1: index, %val: !gpu.mma_matrix<16x16xf16, "COp">) {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<32x32xf16, strided<[32, 1], offset: ?>, 3>
gpu.subgroup_mma_store_matrix %val, %subspan[%i0, %i1] {leadDimension = 128 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x32xf16, strided<[32, 1], offset: ?>, 3>
return
}
@@ -594,19 +554,17 @@
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[VAL:.+]]: !gpu.mma_matrix<16x16xf16, "COp">
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[ZERO]]) : memref<?xf16, 3>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[ZERO]]) : memref<?xf16, 3>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP2]]()[%[[OFFSET]], %[[I0]], %[[I1]]]
// CHECK: gpu.subgroup_mma_store_matrix %[[VAL]], %[[SUBSPAN]][%[[INDEX]]] {leadDimension = 128 : index}
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @load_uniform_buffer(%offset: index, %i0: index, %i1 : index, %i2: index) -> i32 {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
%val = memref.load %subspan[%i0, %i1, %i2] : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
return %val: i32
}
@@ -615,7 +573,7 @@
// CHECK-LABEL: func.func @load_uniform_buffer
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index)
// CHECK: %[[C0:.+]] = arith.constant 0 : index
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xi32, #hal.descriptor_type<uniform_buffer>>
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xi32, #hal.descriptor_type<uniform_buffer>>
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: %[[LD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<?xi32, #hal.descriptor_type<uniform_buffer>>
// CHECK: return %[[LD]] : i32
@@ -623,13 +581,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @store_uniform_buffer(%value : i32, %offset: index, %i0: index, %i1 : index, %i2: index) {
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
memref.store %value, %subspan[%i0, %i1, %i2] : memref<2x3x4xi32, strided<[12, 4, 1], offset:?>, #hal.descriptor_type<uniform_buffer>>
return
}
@@ -640,21 +596,19 @@
// CHECK-SAME: (%[[VAL:.+]]: i32, %[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index, %[[I2:.+]]: index)
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: %[[SIZE:.+]] = affine.apply #[[$MAP0]]()[%[[OFFSET]]]
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%[[C0]]) : memref<?xi32, #hal.descriptor_type<uniform_buffer>>{%[[SIZE]]}
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%[[C0]]) : memref<?xi32, #hal.descriptor_type<uniform_buffer>>{%[[SIZE]]}
// CHECK: %[[INDEX:.+]] = affine.apply #[[$MAP1]]()[%[[OFFSET]], %[[I0]], %[[I1]], %[[I2]]]
// CHECK: memref.store %[[VAL]], %[[SUBSPAN]][%[[INDEX]]] : memref<?xi32, #hal.descriptor_type<uniform_buffer>>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reinterpret_cast_lowering_static_zero_offset() -> f32 {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xf32>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xf32>{%0, %1}
%3 = memref.reinterpret_cast %2 to offset: [0], sizes: [], strides: [] : memref<?x?xf32> to memref<f32>
%4 = memref.load %3[] : memref<f32>
return %4 : f32
@@ -664,16 +618,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reinterpret_cast_lowering_dynamic_zero_offset() -> f32 {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xf32>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xf32>{%0, %1}
%3 = memref.reinterpret_cast %2 to offset: [%c0], sizes: [], strides: [] : memref<?x?xf32> to memref<f32>
%4 = memref.load %3[] : memref<f32>
return %4 : f32
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
index b2529d6..6139480 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-fold-affinemin-in-distributed-loops, canonicalize)))))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @generic_static {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -23,8 +21,8 @@
// CHECK:} -> tensor<32x32xf32>
// CHECK: flow.dispatch.tensor.store {{.*}} sizes = [32, 32], strides = [1, 1] : tensor<32x32xf32> -> !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%2 = affine.min affine_map<()[s0] -> (32, s0 * -32 + 4096)>()[%workgroup_id_y]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir b/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir
index 2e1ab03..594b8ee 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/hoist_unrolled_vector_extract_insert_slice.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-hoist-vector-extract-insert-slice))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @hoist_unrolled_vector_for_mma() {
%c0 = arith.constant 0 : index
@@ -13,11 +11,11 @@
%cst_0 = arith.constant dense<0.000000e+00> : vector<32x32xf32>
%c64 = arith.constant 64 : index
%c2048 = arith.constant 2048 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3456x2048xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<3456x2048xf16>
memref.assume_alignment %0, 64 : memref<3456x2048xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<2048x1024xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<2048x1024xf16>
memref.assume_alignment %1, 64 : memref<2048x1024xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf32>
memref.assume_alignment %2, 64 : memref<3456x1024xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%3 = gpu.thread_id x
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
index c96a6db..fbe6b45 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
@@ -1,22 +1,20 @@
// RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-codegen-iree-comprehensive-bufferize, canonicalize, cse, canonicalize))" --split-input-file | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul() {
%c0 = arith.constant 0 : index
%m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -47,10 +45,10 @@
// CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1]
// CHECK-DAG: %[[WG_COUNT_Y:.+]] = hal.interface.workgroup.count[1]
// CHECK-DAG: %[[WG_SIZE_Y:.+]] = hal.interface.workgroup.size[1]
@@ -78,12 +76,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_fill() {
%cst = arith.constant 0.0 : f32
@@ -94,9 +90,9 @@
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%base_offset_i32 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) alignment(8) : i32
%base_offset = arith.index_castui %base_offset_i32 : i32 to index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%base_offset) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c1024) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%base_offset) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c1024) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -131,11 +127,11 @@
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[BASE_OFFSET_I32:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3)
// CHECK-DAG: %[[BASE_OFFSET:.+]] = arith.index_castui %[[BASE_OFFSET_I32]]
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(32)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(32)
// CHECK-DAG: memref.assume_alignment %[[LHS]], 32
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[BASE_OFFSET]])
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[BASE_OFFSET]])
// CHECK-DAG: memref.assume_alignment %[[RHS]], 8
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c1024)
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c1024)
// CHECK-DAG: memref.assume_alignment %[[RESULT]], 64
// CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1]
// CHECK-DAG: %[[WG_COUNT_Y:.+]] = hal.interface.workgroup.count[1]
@@ -164,11 +160,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @elementwise() {
%c4 = arith.constant 4 : index
@@ -177,8 +171,8 @@
%c512 = arith.constant 512 : index
%c64 = arith.constant 64 : index
%c10 = arith.constant 10 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c512) : !flow.dispatch.tensor<readonly:tensor<1x10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c64) : !flow.dispatch.tensor<writeonly:tensor<1x10xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c512) : !flow.dispatch.tensor<readonly:tensor<1x10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c64) : !flow.dispatch.tensor<writeonly:tensor<1x10xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%2 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x]
@@ -213,8 +207,8 @@
// CHECK: func.func @elementwise()
// CHECK-DAG: %[[CST_TENSOR:.+]] = arith.constant dense_resource<__elided__> : tensor<1x10xf32>
// CHECK-DAG: %[[CST_BUF:.+]] = bufferization.to_memref %[[CST_TENSOR]]
-// CHECK-DAG: %[[IN_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 128>, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 16>, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[IN_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 128>, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : memref<1x10xf32, strided<[10, 1], offset: 16>, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for
// CHECK-DAG: %[[SUB_IN1:.+]] = memref.subview %[[IN_BUF]]
// CHECK-DAG: %[[SUB_OUT1:.+]] = memref.subview %[[OUT_BUF]]
@@ -228,18 +222,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 2)>
#map1 = affine_map<(d0) -> (d0)>
func.func @rank_reduced_slice() {
%c10 = arith.constant 10 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x40xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<10xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x40xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<10xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%3 = affine.apply #map0()[%workgroup_id_x]
@@ -257,8 +249,8 @@
return
}
// CHECK: func.func @rank_reduced_slice()
-// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1x40xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<10xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1x40xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<10xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for %[[IV0:.+]] =
// CHECK-DAG: %[[SRC_SUBVIEW:.+]] = memref.subview %[[SRC_BINDING]][0, %[[IV0]]] [1, 2] [1, 1] : memref<1x40xf32{{.+}}> to memref<2xf32
// CHECK-DAG: %[[DST_SUBVIEW:.+]] = memref.subview %[[DST_BINDING]][%[[IV0]]] [2] [1] : memref<10xf32{{.+}}> to memref<2xf32
@@ -271,11 +263,9 @@
// Checks that there are no errors in early bufferized copy ops. The
// bufferization pass should make it as it is.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @early_bufferized_copy_cst_ops() {
%c0 = arith.constant 0 : index
@@ -283,9 +273,9 @@
%c2 = arith.constant 2 : index
%cst = arith.constant dense<0> : tensor<2x3xi32>
%0 = bufferization.to_memref %cst : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2x5xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2x5xi32>
memref.assume_alignment %1, 64 : memref<2x5xi32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x5xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x5xi32>>
%3 = memref.subview %1[%c0, %c2] [2, 3] [%c1, %c1] : memref<2x5xi32> to memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>
linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) outs(%3 : memref<2x3xi32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>>) {
^bb0(%arg0: i32, %arg1: i32):
@@ -299,12 +289,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tile_from_tensor_load_inplace() {
%c2 = arith.constant 2 : index
@@ -313,9 +301,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -331,9 +319,9 @@
}
// CHECK-LABEL: func.func @tile_from_tensor_load_inplace()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1]
@@ -345,13 +333,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tile_from_tensor_load_inplace_and_copy() {
%c2 = arith.constant 2 : index
@@ -360,10 +346,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -380,10 +366,10 @@
}
// CHECK-LABEL: func.func @tile_from_tensor_load_inplace_and_copy()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1]
@@ -397,12 +383,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @tile_from_pointwise_lhs_inplace() {
@@ -412,9 +396,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -436,9 +420,9 @@
}
// CHECK-LABEL: func.func @tile_from_pointwise_lhs_inplace()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[LHS:.+]] = memref.subview %[[TENSOR_LHS]][%[[IV0]], 0] [1, 3] [1, 1]
@@ -454,13 +438,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @tile_from_pointwise_outs() {
@@ -470,10 +452,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -494,10 +476,10 @@
return
}
// CHECK-LABEL: func.func @tile_from_pointwise_outs()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[TENSOR_INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1]
@@ -513,12 +495,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @tile_from_pointwise_outs_inplace() {
@@ -529,9 +509,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -552,9 +532,9 @@
}
// CHECK-LABEL: func.func @tile_from_pointwise_outs_inplace()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1]
@@ -568,12 +548,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tile_from_matmul_outs_inplace() {
%c2 = arith.constant 2 : index
@@ -582,9 +560,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
scf.for %arg0 = %workgroup_id_y to %c2 step %c2 {
@@ -601,9 +579,9 @@
}
// CHECK-LABEL: func.func @tile_from_matmul_outs_inplace()
-// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[TENSOR_LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[TENSOR_RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[RESULT:.+]] = memref.subview %[[RETURN]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1]
@@ -616,12 +594,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)>
#map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)>
@@ -633,9 +609,9 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%4, %5}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%4, %5}
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -671,9 +647,9 @@
// CHECK: %[[DIM3:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index
// CHECK: %[[DIM4:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(4) : index
// CHECK: %[[DIM5:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(5) : index
-// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM0]], %[[DIM1]]}
-// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM2]], %[[DIM3]]}
-// CHECK: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM4]], %[[DIM5]]}
+// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM0]], %[[DIM1]]}
+// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM2]], %[[DIM3]]}
+// CHECK: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[DIM4]], %[[DIM5]]}
// CHECK-DAG: %[[WGSIZE_X:.+]] = hal.interface.workgroup.size[0]
// CHECK-DAG: %[[WGSIZE_Y:.+]] = hal.interface.workgroup.size[1]
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
@@ -691,39 +667,35 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reshape_simple() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
return
}
// CHECK-LABEL: func.func @reshape_simple()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]]
// CHECK: linalg.generic {{.*}} ins(%[[RESHAPE]] {{.*}} outs(%[[RET0]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
module {
func.func @reshape_fused_source() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>> -> tensor<3x4xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%4 = tensor.expand_shape %3 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
@@ -737,8 +709,8 @@
}
}
// CHECK-LABEL: func.func @reshape_fused_source()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32
@@ -746,19 +718,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @reshape_fused_source_and_copyout() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<12xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>> -> tensor<3x4xi32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12xi32>> -> tensor<12xi32>
%5 = tensor.expand_shape %4 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
%6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<3x4xi32>) outs(%2 : tensor<3x4xi32>) {
@@ -771,9 +741,9 @@
return
}
// CHECK-LABEL: func.func @reshape_fused_source_and_copyout()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[ARG0]] {{\[}}[0, 1]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[RESHAPE]] : memref<3x4xi32
@@ -782,16 +752,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @reshape_fused_target() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<12xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<12xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [12], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<12xi32>> -> tensor<12xi32>
%3 = tensor.expand_shape %2 [[0, 1]] output_shape [3, 4] : tensor<12xi32> into tensor<3x4xi32>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x4xi32>> -> tensor<3x4xi32>
@@ -805,8 +773,8 @@
return
}
// CHECK-LABEL: func.func @reshape_fused_target()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<12xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[RESHAPE:.+]] = memref.expand_shape %[[RET0]] {{\[}}[0, 1]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0]] : memref<3x4xi32
@@ -814,12 +782,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0)[s0] -> (-d0 + 1, s0)>
#map1 = affine_map<(d0)[s0] -> (-d0 + 3, s0)>
@@ -827,9 +793,9 @@
%cst = arith.constant 0.000000e+00 : f32
%c3 = arith.constant 3 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x2xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x3xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x2xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x3xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 1, 2], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x2xf32>> -> tensor<1x1x2xf32>
%4 = tensor.collapse_shape %3 [[0, 1], [2]] : tensor<1x1x2xf32> into tensor<1x2xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
@@ -857,10 +823,10 @@
return
}
// CHECK-LABEL: func.func @dot_general_lowering()
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[RESHAPE_LHS:.+]] = memref.collapse_shape %[[LHS]]
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %[[IV0:.+]] = {{.+}} {
// CHECK: scf.for %[[IV1:.+]] = {{.+}} {
// CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[RESHAPE_LHS]][%[[IV0]], 0]
@@ -874,37 +840,33 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @slice() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
%7 = tensor.extract_slice %6[%0, %1] [%2, %3] [1, 1] : tensor<?x?xi32> to tensor<?x?xi32>
flow.dispatch.tensor.store %7, %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
return
}
// CHECK-LABEL: func.func @slice()
-// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SUBVIEW:.+]] = memref.subview %[[ARG]]
// CHECK: linalg.generic {{.*}} ins(%[[SUBVIEW]] {{.*}} outs(%[[RETURN]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @slice_rank_reducing() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -912,27 +874,25 @@
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%4, %4, %4}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%4, %4, %4}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [%4, %4, %4], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%4, %4, %4} -> tensor<?x?x?xi32>
%8 = tensor.extract_slice %7[%0, %0, %1] [%2, 1, %3] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?xi32>
flow.dispatch.tensor.store %8, %6, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
return
}
// CHECK-LABEL: func.func @slice_rank_reducing()
-// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SUBVIEW:.+]] = memref.subview %[[ARG]]
// CHECK: linalg.generic {{.*}} ins(%[[SUBVIEW]] {{.*}} outs(%[[RETURN]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 7, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 7, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @slice_multiple_copy() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -942,9 +902,9 @@
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
%6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%6, %6, %6}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xi32>>{%3, %4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%3, %5}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%6, %6, %6}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xi32>>{%3, %4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%3, %5}
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0], sizes = [%6, %6, %6], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xi32>>{%6, %6, %6} -> tensor<?x?x?xi32>
%11 = tensor.extract_slice %10[%0, %1, %2] [%3, %4, %5] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?x?xi32>
%12 = tensor.extract_slice %10[%0, %1, %2] [%3, 1, %5] [1, 1, 1] : tensor<?x?x?xi32> to tensor<?x?xi32>
@@ -953,9 +913,9 @@
return
}
// CHECK-LABEL: func.func @slice_multiple_copy()
-// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RETURN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RETURN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[SIZE1:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
// CHECK-DAG: %[[SIZE2:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
// CHECK-DAG: %[[SIZE3:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
@@ -966,15 +926,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @slice_in_place() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1}
%3 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
flow.dispatch.tensor.store %3, %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1}
return
@@ -984,39 +942,35 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @slice_whole_stride_dispatch_0() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
%7 = tensor.extract_slice %6[1, 0] [1, 4] [1, 1] : tensor<?x?xi32> to tensor<1x4xi32>
flow.dispatch.tensor.store %7, %5, offsets = [0, 0], sizes = [1, 4], strides = [1, 1] : tensor<1x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%2, %3}
return
}
// CHECK-LABEL: func.func @slice_whole_stride_dispatch_0()
-// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SUB_IN_FIXED:.+]] = memref.subview %[[INPUT]][1, 0] [1, 4] [1, 1]
// CHECK-DAG: %[[SUB_OUT_FIXED:.+]] = memref.subview %[[OUTPUT]][0, 0] [1, 4] [1, 1]
// CHECK: linalg.generic {{.*}} ins(%[[SUB_IN_FIXED]] {{.*}} outs(%[[SUB_OUT_FIXED]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @subtensor_insert() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -1025,9 +979,9 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%2, %3}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%4, %5}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%2, %3}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%4, %5}
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%2, %3} -> tensor<?x?xi32>
%11 = tensor.insert_slice %9 into %10[3, 4] [%0, %1] [1, 1] : tensor<?x?xi32> into tensor<?x?xi32>
@@ -1035,9 +989,9 @@
return
}
// CHECK-LABEL: func.func @subtensor_insert()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[D0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0) : index
// CHECK-DAG: %[[D1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index
// CHECK-DAG: %[[D2:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
@@ -1052,15 +1006,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_extract() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<i32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x9xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<i32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<3x9xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 9], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<3x9xi32>> -> tensor<3x9xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i32>> -> tensor<i32>
%4 = tensor.extract %3[] : tensor<i32>
@@ -1069,8 +1021,8 @@
return
}
// CHECK-LABEL: func.func @tensor_extract()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[LOAD:.+]] = memref.load %[[ARG0]]
// CHECK: linalg.fill
// CHECK-SAME: ins(%[[LOAD]] :
@@ -1078,31 +1030,27 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @load_to_store() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3x4xi32>> -> tensor<3x4xi32>
flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [3, 4], strides = [1, 1] : tensor<3x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<3x4xi32>>
return
}
// CHECK-LABEL: func.func @load_to_store()
-// CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<3x4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: linalg.generic {{.*}} ins(%[[IN]] {{.*}} outs(%[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0)[s0] -> (-d0 + 5, s0)>
@@ -1111,8 +1059,8 @@
%cst_0 = arith.constant 0.000000e+00 : f32
%c5 = arith.constant 5 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x5x3x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<5x5xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x5x3x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<5x5xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 5, 3, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x5x3x1xf32>> -> tensor<1x5x3x1xf32>
%3 = tensor.collapse_shape %2 [[0, 1], [2, 3]] : tensor<1x5x3x1xf32> into tensor<5x3xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
@@ -1142,8 +1090,8 @@
// CHECK-LABEL: func.func @rhs_non_splat_constant
// CHECK-DAG: %[[CONSTANT:.+]] = arith.constant {{.+}} : tensor<3x5xf32>
// CHECK-DAG: %[[RHS:.+]] = bufferization.to_memref %[[CONSTANT]]
-// CHECK-DAG: %[[LHS_INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1x5x3x1xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<5x5xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[LHS_INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1x5x3x1xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RETURN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<5x5xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[LHS:.+]] = memref.collapse_shape %[[LHS_INPUT]]
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
@@ -1158,12 +1106,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0, d1) -> (d0)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -1173,9 +1119,9 @@
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xi32>>{%2}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%3, %4}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xi32>>{%2}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%3, %4}
%8 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%3, %4} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xi32>>{%2} -> tensor<?xi32>
@@ -1190,26 +1136,24 @@
return
}
// CHECK-LABEL: func.func @gather()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: linalg.generic
// CHECK: %[[VAL:.+]] = memref.load %[[ARG0]]
// CHECK: linalg.yield %[[VAL]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pooling_nhwc_sum() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<f32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x6x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<f32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x6x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x1xf32>>
%3 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 2, 2, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<1x2x2x1xf32>> -> tensor<1x2x2x1xf32>
%4 = bufferization.alloc_tensor() : tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
@@ -1222,9 +1166,9 @@
}
// CHECK-LABEL: func.func @pooling_nhwc_sum
// CHECK-DAG: %[[WINDOW:.+]] = memref.alloc() : memref<2x3xf32>
-// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<f32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<1x4x6x1xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<1x2x2x1xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[INIT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<f32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<1x4x6x1xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<1x2x2x1xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[INIT_VAL:.+]] = memref.load %[[INIT]][] : memref<f32{{.+}}>
// CHECK: linalg.fill
// CHECK-SAME: ins(%[[INIT_VAL]] :
@@ -1237,12 +1181,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)>
@@ -1255,10 +1197,10 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
%8 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %5}
%10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %5} -> tensor<?x?xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
@@ -1291,9 +1233,9 @@
return
}
// CHECK-LABEL: func.func @read_only_subtensor
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for
// CHECK: scf.for
// CHECK-DAG: %[[SV1:.+]] = memref.subview %[[ARG0]]
@@ -1305,19 +1247,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0) -> (d0)>
func.func @reshape_read_only() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%2}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%2}
%5 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%2} -> tensor<?xf32>
%6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%7 = tensor.collapse_shape %6 [[0, 1]] : tensor<?x?xf32> into tensor<?xf32>
@@ -1332,8 +1272,8 @@
return
}
// CHECK-LABEL: func.func @reshape_read_only
-// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[RESHAPE:.+]] = memref.collapse_shape %[[INPUT]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[RESHAPE]] : memref<?xf32
@@ -1341,22 +1281,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0, d1, d2, d3) -> (d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @use_buffer_for_operand_when_output_tensor_not_used() {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>> -> tensor<1x112x112x32xf32>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 255, 255, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>> -> tensor<1x225x225x16xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>> -> tensor<3x3x16x32xf32>
@@ -1374,7 +1312,7 @@
// CHECK: func.func @use_buffer_for_operand_when_output_tensor_not_used()
// CHECK-NOT: memref.alloc
-// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: linalg.fill
// CHECK-SAME: outs(%[[OUTPUT]] :
// CHECK-NEXT: linalg.conv_2d_nhwc_hwcf
@@ -1385,23 +1323,21 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d3)>
func.func @dont_use_buffer_for_operand_when_output_tensor_used() {
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>> -> tensor<1x112x112x32xf32>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x16xf32>> -> tensor<1x225x225x16xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x16x32xf32>> -> tensor<3x3x16x32xf32>
@@ -1421,7 +1357,7 @@
}
// CHECK-LABEL: func.func @dont_use_buffer_for_operand_when_output_tensor_used()
// CHECK-DAG: %[[ALLOC:.+]] = memref.alloc
-// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: linalg.fill
// CHECK-SAME: outs(%[[ALLOC]] :
// CHECK-NEXT: linalg.conv_2d_nhwc_hwcf
@@ -1434,11 +1370,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0) -> (-d0 + 4)>
#map1 = affine_map<(d0) -> (d0)>
@@ -1447,8 +1381,8 @@
%c-2147483648_i32 = arith.constant -2147483648 : i32
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<5xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<i32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<5xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<i32>>
%2 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<writeonly:tensor<i32>> -> tensor<i32>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [5], strides = [1] : !flow.dispatch.tensor<readonly:tensor<5xf32>> -> tensor<5xf32>
%4 = linalg.fill ins(%c-2147483648_i32 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
@@ -1469,20 +1403,18 @@
// CHECK-DAG: %[[CST1:.+]] = arith.constant -2147483648 : i32
// CHECK-DAG: %[[CST5:.+]] = arith.constant dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
// CHECK: %[[CAST5:.+]] = bufferization.to_memref %[[CST5]] : memref<5xi32>
-// CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<5xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<i32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<5xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<i32, #hal.descriptor_type<storage_buffer>>
// CHECK: linalg.fill ins(%[[CST1]] : i32) outs(%[[OUTPUT]] : memref<i32{{.+}}>)
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[INPUT]], %[[CAST5]] : {{.*}}) outs(%[[OUTPUT]] : memref<i32{{.+}}>)
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 32)>
func.func @cast_follwed_by_store() {
@@ -1491,9 +1423,9 @@
%c64 = arith.constant 64 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x32x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x32x64xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -1520,9 +1452,9 @@
}
// CHECK-LABEL: func.func @cast_follwed_by_store()
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4x32x1024xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4x1024x64xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<4x32x64xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4x32x1024xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4x1024x64xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RESULT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<4x32x64xf32, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[LHSV:.+]] = memref.subview %[[LHS]]
// CHECK-DAG: %[[RHSV:.+]] = memref.subview %[[RHS]]
// CHECK-DAG: %[[RESULTV:.+]] = memref.subview %[[RESULT]]
@@ -1533,11 +1465,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @rank_reduced_subtensor_insert() {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
@@ -1545,8 +1475,8 @@
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<?x?x?xf32>>{%2, %3, %4}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<?x?x?xf32>>{%2, %3, %4}
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0], sizes = [%2, %3, %4], strides = [1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x?xf32>>{%2, %3, %4} -> tensor<?x?x?xf32>
%9 = tensor.insert_slice %7 into %8[0, 0, 0] [1, %3, %4] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32>
@@ -1554,19 +1484,17 @@
return
}
// CHECK-LABEL: func.func @rank_reduced_subtensor_insert()
-// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[ARG:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[RET]]
// CHECK: linalg.generic {{.*}} ins(%[[ARG]] {{.*}} outs(%[[SUBVIEW]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -1577,9 +1505,9 @@
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x4xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<2x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x4xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<2x4xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [%c0, %c0], sizes = [2, 3], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:tensor<2x3xf32>> -> tensor<2x3xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [%c0, %c0], sizes = [3, 1], strides = [%c1, %c1] : !flow.dispatch.tensor<readonly:tensor<3x4xf32>> -> tensor<3x1xf32>
%6 = flow.dispatch.tensor.load %3, offsets = [%c0, %c0], sizes = [2, 1], strides = [%c1, %c1] : !flow.dispatch.tensor<readwrite:tensor<2x4xf32>> -> tensor<2x1xf32>
@@ -1607,9 +1535,9 @@
}
// CHECK-LABEL: func.func @bufferize_transfer_op_inplace()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]]
// CHECK-DAG: %[[RET0V:.+]] = memref.subview %[[RET0]]
// CHECK-COUNT-6: vector.transfer_read %[[ARG0]]
@@ -1621,13 +1549,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 10, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 10, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -1641,10 +1567,10 @@
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
%6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
%7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%6, %7}
%12 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index
%13 = hal.interface.constant.load layout(#pipeline_layout) ordinal(9) : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -1678,10 +1604,10 @@
return
}
// CHECK-LABEL: func.func @multi_result()
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK-DAG: %[[ARG0V:.+]] = memref.subview %[[ARG0]]
// CHECK-DAG: %[[ARG1V:.+]] = memref.subview %[[ARG1]]
// CHECK-DAG: %[[RET0V:.+]] = memref.subview %[[RET0]]
@@ -1692,13 +1618,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 128)>
#map1 = affine_map<(d0)[s0] -> (-d0 + s0, 128)>
@@ -1710,10 +1634,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%2}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%2}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%2}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%2}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%7 = affine.apply #map0()[%workgroup_id_x]
@@ -1743,10 +1667,10 @@
return
}
// CHECK-LABEL: func.func @multi_result_reduce
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[RET1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK: scf.for
// CHECK-DAG: %[[ARG0_SV:.+]] = memref.subview %[[ARG0]]
// CHECK-DAG: %[[ARG1_SV:.+]] = memref.subview %[[ARG1]]
@@ -1762,12 +1686,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<(d0) -> (-d0 + 250, 64)>
@@ -1784,9 +1706,9 @@
%c1 = arith.constant 1 : index
%c250 = arith.constant 250 : index
%c370 = arith.constant 370 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<250x144xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<144x370xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<250x370xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<250x144xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<144x370xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<250x370xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -1834,9 +1756,9 @@
// CHECK-DAG: %[[K:.+]] = arith.constant 144 : index
// CHECK-DAG: %[[L1_MN_SIZE:.+]] = arith.constant 32 : index
// CHECK-DAG: %[[L1_K_SIZE:.+]] = arith.constant 24 : index
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<250x144xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<144x370xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<250x370xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<250x144xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<144x370xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<250x370xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for %[[WORKGROUP_I:.+]] = %{{.*}} to %[[M]] step %{{.*}} {
// CHECK: scf.for %[[WORKGROUP_J:.+]] = %{{.*}} to %[[N]] step %{{.*}} {
// CHECK-DAG: %[[WORKGROUP_I_SIZE:.+]] = affine.min #{{.*}}(%[[WORKGROUP_I]])
@@ -1858,12 +1780,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<(d0) -> (-d0 + 250, 64)>
@@ -1881,9 +1801,9 @@
%c1 = arith.constant 1 : index
%c250 = arith.constant 250 : index
%c370 = arith.constant 370 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<250x144xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<144x370xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<250x370xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<250x144xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<144x370xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<250x370xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -1932,9 +1852,9 @@
// CHECK-DAG: %[[K:.+]] = arith.constant 144 : index
// CHECK-DAG: %[[L1_MN_SIZE:.+]] = arith.constant 32 : index
// CHECK-DAG: %[[L1_K_SIZE:.+]] = arith.constant 24 : index
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<250x144xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<144x370xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<250x370xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<250x144xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<144x370xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<250x370xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for %[[WORKGROUP_I:.+]] = %{{.*}} to %[[M]] step %{{.*}} {
// CHECK: scf.for %[[WORKGROUP_J:.+]] = %{{.*}} to %[[N]] step %{{.*}} {
// CHECK-DAG: %[[WORKGROUP_I_SIZE:.+]] = affine.min #{{.*}}(%[[WORKGROUP_I]])
@@ -1956,11 +1876,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0, s1] -> (s1 * s0)>
#map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)>
@@ -1972,8 +1890,8 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%2, %3}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%4, %5}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%2, %3}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%4, %5}
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -1998,8 +1916,8 @@
}
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK: func.func @tensor_insert_slice()
-// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[OFFSET_Y:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0)
// CHECK-DAG: %[[OFFSET_X:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1)
// CHECK: scf.for %[[IV0:.+]] =
@@ -2012,12 +1930,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<(d0)[s0] -> (-d0 + s0, 64)>
@@ -2026,9 +1942,9 @@
%c0_i32 = arith.constant 0 : i32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi32>>{%0}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<i32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%1, %0}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi32>>{%0}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<i32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%1, %0}
%5 = flow.dispatch.tensor.load %3, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i32>> -> tensor<i32>
%6 = tensor.extract %5[] : tensor<i32>
%7 = arith.cmpi slt, %6, %c0_i32 : i32
@@ -2049,8 +1965,8 @@
return
}
// CHECK-LABEL: func.func @dynamic_update_slice()
-// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<?x?xi32, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[OFFSET_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK: scf.for %[[IV0:.+]] =
@@ -2062,13 +1978,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0)[s0, s1] -> (-d0 + s1, s0)>
@@ -2083,10 +1997,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<f32>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<f32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -2146,10 +2060,10 @@
// CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[K]]}
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[K]], %[[N]]}
-// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<f32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[N]]}
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[K]]}
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[K]], %[[N]]}
+// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<f32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[N]]}
// CHECK: scf.for
// CHECK: scf.for
// CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]]
@@ -2174,13 +2088,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 4)>
#map1 = affine_map<(d0) -> (-d0 + 2, 4)>
@@ -2194,10 +2106,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<f32>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<f32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %5, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -2230,10 +2142,10 @@
// CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
-// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[K]]}
-// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[K]], %[[N]]}
-// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<f32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[N]]}
+// CHECK-DAG: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[K]]}
+// CHECK-DAG: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[K]], %[[N]]}
+// CHECK-DAG: %[[SCALAR:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<f32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>{%[[M]], %[[N]]}
// CHECK: scf.for
// CHECK: scf.for
// CHECK-DAG: %[[LHS_SUBVIEW1:.+]] = memref.subview %[[LHS]]
@@ -2250,11 +2162,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 4)>
#map1 = affine_map<()[s0] -> (s0 * 2)>
@@ -2270,9 +2180,9 @@
%cst_0 = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x6x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c64) : !flow.dispatch.tensor<readonly:tensor<2x1x2xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x6x2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x6x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c64) : !flow.dispatch.tensor<readonly:tensor<2x1x2xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x6x2xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -2312,26 +2222,24 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @no_op_subview() {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%5 = tensor.extract_slice %4[0, 0] [%0, %1] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
flow.dispatch.tensor.store %5, %3, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
return
}
// CHECK-LABEL: func.func @no_op_subview()
-// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SRC_DUP:.+]] = memref.subview %[[SRC]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[SRC_DUP]] :
@@ -2339,17 +2247,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @rank_reducing_no_op_subview() {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%0}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%0}
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0} -> tensor<1x?xf32>
%4 = tensor.extract_slice %3[0, 0] [1, %0] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
flow.dispatch.tensor.store %4, %2, offsets = [0], sizes = [%0], strides = [1] : tensor<?xf32> -> !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%0}
@@ -2357,8 +2263,8 @@
}
// CHECK-LABEL: func.func @rank_reducing_no_op_subview()
-// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[SRC:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[DEST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[SRC]][0, 0] [1, %{{.+}}]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[SUBVIEW]] :
@@ -2382,18 +2288,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @scan_1d_dim0_inclusive_sum() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<6xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<f32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<6xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<6xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<f32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<6xf32>>
%3 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<6xf32>> -> tensor<6xf32>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor<readonly:tensor<6xf32>> -> tensor<6xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:tensor<f32>> -> tensor<f32>
@@ -2414,14 +2318,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sort1D() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4xi32>>
%1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<4xi32>> -> tensor<4xi32>
%2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<4xi32>) {
^bb0(%arg0: i32, %arg1: i32):
@@ -2432,25 +2334,23 @@
return
}
// CHECK-LABEL: func.func @sort1D
-// CHECK: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: iree_linalg_ext.sort
// CHECK-SAME: outs(%[[BUF]] : memref<4xi32{{.+}}>)
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @scatter_update_scalar_1D() {
%c4 = arith.constant 4 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x1xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<8xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x1xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<8xi32>>
%3 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [8], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<8xi32>> -> tensor<8xi32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -2468,9 +2368,9 @@
return
}
// CHECK: func.func @scatter_update_scalar_1D
-// CHECK-DAG: %[[UPDATE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x1xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[ORIGINAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref<8xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[UPDATE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x1xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ORIGINAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref<8xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: scf.for %[[I:.+]] = %{{.+}} to %{{.+}} step %{{.+}}
// CHECK: iree_linalg_ext.scatter
// CHECK-SAME: ins(%[[UPDATE]], %[[INDICES]]
@@ -2478,17 +2378,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @topk() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<200x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<200x8xf32>>
%input_values = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [200, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<200x8xf32>> -> tensor<200x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<200x8xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<200x8xi32>>
%input_indices = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [200, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<200x8xi32>> -> tensor<200x8xi32>
%out_values = bufferization.alloc_tensor() : tensor<200x3xf32>
%out_indices = bufferization.alloc_tensor() : tensor<200x3xi32>
@@ -2503,8 +2401,8 @@
return
}
// CHECK: func.func @topk
-// CHECK-DAG: %[[INPUT_VALUES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<200x8xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[INPUT_INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<200x8xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[INPUT_VALUES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<200x8xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[INPUT_INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<200x8xi32, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[OUTPUT_VALUES:.+]] = memref.alloc() : memref<200x3xf32>
// CHECK-DAG: %[[OUTPUT_INDICES:.+]] = memref.alloc() : memref<200x3xi32>
// CHECK: iree_linalg_ext.topk
@@ -2513,17 +2411,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @iree_linalg_ext_pack() {
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>> -> tensor<2x2x3x3xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%4 = iree_linalg_ext.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : (tensor<4x4xi32> tensor<2x2x3x3xi32>) -> tensor<2x2x3x3xi32>
@@ -2532,24 +2428,22 @@
}
// CHECK: func.func @iree_linalg_ext_pack
// CHECK-DAG: %[[PAD:.+]] = arith.constant 0 : i32
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: iree_linalg_ext.pack %[[IN]]
// CHECK-SAME: padding_value(%[[PAD]] : i32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @iree_linalg_ext_unpack() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x2x2xi32>
%4 = iree_linalg_ext.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : (tensor<2x2x2x2xi32> tensor<4x4xi32>) -> tensor<4x4xi32>
@@ -2557,24 +2451,22 @@
return
}
// CHECK: func.func @iree_linalg_ext_unpack
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: iree_linalg_ext.unpack %[[IN]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @iree_linalg_ext_unpack_fully_dynamic() {
%c0 = arith.constant 0 : index
%inner_d0 = util.unfoldable_constant 2 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x?x?xi32>
%4 = iree_linalg_ext.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : (tensor<2x2x?x?xi32> tensor<4x4xi32>) -> tensor<4x4xi32>
@@ -2589,17 +2481,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_pack() {
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x4xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>> -> tensor<2x2x3x3xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%4 = tensor.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32>
@@ -2608,24 +2498,22 @@
}
// CHECK: func.func @tensor_pack
// CHECK-DAG: %[[PAD:.+]] = arith.constant 0 : i32
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<2x2x3x3xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: iree_linalg_ext.pack %[[IN]]
// CHECK-SAME: padding_value(%[[PAD]] : i32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_unpack() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x2x2xi32>
%4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
@@ -2633,24 +2521,22 @@
return
}
// CHECK: func.func @tensor_unpack
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) : memref<2x2x2x2xi32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<4x4xi32, #hal.descriptor_type<storage_buffer>>
// CHECK: iree_linalg_ext.unpack %[[IN]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_unpack_fully_dynamic() {
%c0 = arith.constant 0 : index
%inner_d0 = util.unfoldable_constant 2 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
%2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x?x?xi32>
%4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32>
@@ -2665,20 +2551,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @reduction_ew() {
%c5120 = arith.constant 5120 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor<readonly:tensor<1001xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor<readonly:tensor<1x1001xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1001xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor<readonly:tensor<1001xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5120) : !flow.dispatch.tensor<readonly:tensor<1x1001xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1001xf32>>
%3 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1001], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<1x1001xf32>> -> tensor<1x1001xf32>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1001], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1001xf32>> -> tensor<1001xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1001], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1001xf32>> -> tensor<1x1001xf32>
@@ -2700,29 +2584,27 @@
}
// CHECK: func.func @reduction_ew
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c5120) : memref<1001xf32, strided<[1], offset: 1280>, #hal.descriptor_type<storage_buffer>>
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c5120) : memref<1x1001xf32, strided<[1001, 1], offset: 1280>, #hal.descriptor_type<storage_buffer>>
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) : memref<1x1001xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c5120) : memref<1001xf32, strided<[1], offset: 1280>, #hal.descriptor_type<storage_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c5120) : memref<1x1001xf32, strided<[1001, 1], offset: 1280>, #hal.descriptor_type<storage_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) : memref<1x1001xf32, #hal.descriptor_type<storage_buffer>>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>,
- #hal.descriptor_set.binding<1, uniform_buffer>,
- #hal.descriptor_set.binding<2, uniform_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<uniform_buffer>,
+ #hal.pipeline.binding<uniform_buffer>,
+ #hal.pipeline.binding<uniform_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @uniform_storage_buffer() {
%c0 = arith.constant 0 : index
%m = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%n = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%k = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
- %init = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
- %result = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %k}
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%k, %n}
+ %init = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%m, %n}
+ %result = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%m, %n}
%wg_id_y = hal.interface.workgroup.id[1] : index
%wg_count_y = hal.interface.workgroup.count[1] : index
%wg_size_y = hal.interface.workgroup.size[1] : index
@@ -2748,30 +2630,28 @@
}
// CHECK-LABEL: func.func @uniform_storage_buffer()
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(3) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<uniform_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @micro_kernel_op() {
%d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%s0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : f32
%s1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i64
- %arg0_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1}
- %arg1_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%d0, %d1}
- %arg2_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%d0, %d1}
- %arg3_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1}
+ %arg0_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1}
+ %arg1_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%d0, %d1}
+ %arg2_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%d0, %d1}
+ %arg3_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1}
%arg0 = flow.dispatch.tensor.load %arg0_binding, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%d0, %d1} -> tensor<?x?xf32>
%arg1 = flow.dispatch.tensor.load %arg1_binding, offsets = [0, 0], sizes = [%d0, %d1], strides = [1, 1]
@@ -2792,10 +2672,10 @@
// CHECK-LABEL: func @micro_kernel_op()
// CHECK-DAG: %[[S0:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
// CHECK-DAG: %[[S1:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(3)
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[ARG3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<?x?xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[ARG3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) : memref<?x?xf32, #hal.descriptor_type<uniform_buffer>>
// CHECK: iree_codegen.ukernel.generic "foo"
// CHECK-SAME: ins(%[[ARG0]] :
// CHECK-SAME: outs(%[[ARG1]], %[[ARG2]] :
@@ -2804,17 +2684,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sub_byte_bufferize_with_offset() {
%c64 = arith.constant 64 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%2 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x]
%3 = flow.dispatch.tensor.load %1, offsets = [%2], sizes = [64], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<64xf32>> -> tensor<64xf32>
@@ -2830,7 +2708,7 @@
}
// CHECK-LABEL: func.func @sub_byte_bufferize_with_offset()
// CHECK: %[[C64:.+]] = arith.constant 64 : index
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: memref<64xi4, strided<[1], offset: 128>
// -----
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
index ae71018..5b47372 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_expand_strided_metadata.mlir
@@ -64,14 +64,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_zero_offset_memref() -> (memref<f32>, index, index, index, index, index) {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x384xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x384xf32>
%base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref<512x384xf32> -> memref<f32>, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref<f32>, index, index, index, index, index
}
@@ -80,19 +78,17 @@
// CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<196608xf32>
+// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<196608xf32>
// CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: []
// CHECK: return %[[BASE_PTR]], %[[C0]], %[[C512]], %[[C384]], %[[C384]], %[[C1]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_offset_index_memref(%arg0 : index) -> (memref<index>, index, index, index, index, index) {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex, strided<[384, 1], offset:?>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex, strided<[384, 1], offset:?>>
%base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref<512x384xindex, strided<[384, 1], offset:?>> -> memref<index>, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref<index>, index, index, index, index, index
}
@@ -106,20 +102,18 @@
// CHECK: %[[SIZEOF:.+]] = util.sizeof index
// CHECK: %[[OFFSET:.+]] = affine.apply #[[MAP0]]()[%arg0, %[[SIZEOF]]]
// CHECK: %[[SUBSPAN_SIZE:.+]] = affine.apply #[[MAP1]]()[%arg0, %[[SIZEOF]]]
-// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<?xindex>{%[[SUBSPAN_SIZE]]}
+// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<?xindex>{%[[SUBSPAN_SIZE]]}
// CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: []
// CHECK: return %[[BASE_PTR]], %[[OFFSET]], %[[C512]], %[[C384]], %[[C384]], %[[C1]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_dyn_dims_memref(%arg0 : index, %arg1 : index) -> (memref<index>, index, index, index, index, index) {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<?x?xindex>{%arg0, %arg1}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<?x?xindex>{%arg0, %arg1}
%base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %0 : memref<?x?xindex> -> memref<index>, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : memref<index>, index, index, index, index, index
}
@@ -128,7 +122,7 @@
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%arg0, %arg1]
-// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<?xindex>{%[[SIZE]]}
+// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<?xindex>{%[[SIZE]]}
// CHECK: %[[BASE_PTR:.+]] = memref.reinterpret_cast %[[BINDING]] to offset: [0], sizes: [], strides: []
// CHECK: return %[[BASE_PTR]], %[[C0]], %arg0, %arg1, %arg1, %[[C1]]
@@ -186,15 +180,13 @@
// Tests for the part of the pass that converts iree_codegen to memref.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @external_func_entry_point() -> (memref<bf16>, index) {
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) flags(ReadOnly) : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>>
%base_buffer, %offset, %sizes:3, %strides:3 = iree_codegen.extract_strided_metadata %2 : memref<1x8x768xbf16, strided<[6144, 768, 1], offset: ?>> -> memref<bf16>, index, index, index, index, index, index, index
return %base_buffer, %offset : memref<bf16>, index
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir b/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir
index 83c7dc7..3ca8c27 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_into_nop.mlir
@@ -175,35 +175,31 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<1x1xf32>, matmul_narrow_M = 1 : index, matmul_narrow_N = 1 : index, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>
func.func @drop_encoding_for_hal_flow_ops_static() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xf32, #encoding_lhs>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xf32, #encoding_lhs>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1xf32>> -> tensor<1x1xf32>
%3 = iree_encoding.set_encoding %2 : tensor<1x1xf32> -> tensor<1x1xf32, #encoding_lhs>
flow.dispatch.tensor.store %3, %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : tensor<1x1xf32, #encoding_lhs> -> !flow.dispatch.tensor<writeonly:tensor<1x1xf32, #encoding_lhs>>
return
}
// CHECK-LABEL: func.func @drop_encoding_for_hal_flow_ops_static
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : !flow.dispatch.tensor<writeonly:tensor<1x1xf32>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : !flow.dispatch.tensor<writeonly:tensor<1x1xf32>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK: flow.dispatch.tensor.store %[[LOAD]], %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [bf16, bf16, bf16], original_type = tensor<?x?xbf16>, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>]>
func.func @drop_encoding_for_hal_flow_ops_dynamic() {
@@ -225,15 +221,15 @@
%13 = arith.index_castui %12 : i64 to index
%14 = flow.dispatch.workload.ordinal %8, 0 : index
%15 = flow.dispatch.workload.ordinal %13, 1 : index
- %16 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xbf16>>{%14, %15}
- %17 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xbf16, #encoding_lhs>>{%14, %15}
+ %16 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xbf16>>{%14, %15}
+ %17 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xbf16, #encoding_lhs>>{%14, %15}
%18 = flow.dispatch.tensor.load %16, offsets = [0, 0], sizes = [%14, %15], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xbf16>>{%14, %15} -> tensor<?x?xbf16>
%19 = iree_encoding.set_encoding %18 : tensor<?x?xbf16> -> tensor<?x?xbf16, #encoding_lhs>
flow.dispatch.tensor.store %19, %17, offsets = [0, 0], sizes = [%14, %15], strides = [1, 1] : tensor<?x?xbf16, #encoding_lhs> -> !flow.dispatch.tensor<writeonly:tensor<?x?xbf16, #encoding_lhs>>{%14, %15}
return
}
// CHECK-LABEL: func.func @drop_encoding_for_hal_flow_ops_dynamic
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : !flow.dispatch.tensor<readonly:tensor<?x?xbf16>>
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : !flow.dispatch.tensor<writeonly:tensor<?x?xbf16>>
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : !flow.dispatch.tensor<readonly:tensor<?x?xbf16>>
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : !flow.dispatch.tensor<writeonly:tensor<?x?xbf16>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK: flow.dispatch.tensor.store %[[LOAD]], %[[OUT]]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir b/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir
index 8e80747..6bc67b8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/materialize_user_configs.mlir
@@ -3,20 +3,18 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [32, 32, 0], [0, 0, 32], [0, 0, 0]]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {target_triple = "x86_64-xyz-xyz"}>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
module {
func.func @preset_config() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x512xf32>> -> tensor<256x512xf32>
%5 = tensor.empty() : tensor<128x512xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
index 1255453..5b072cb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
@@ -172,12 +172,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -192,7 +190,7 @@
%c1 = arith.constant 1 : index
%cst_0 = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1280xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1280xf16>>
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%1 = affine.apply #map()[%workgroup_id_y]
@@ -229,19 +227,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @_batch_matmul_narrow_n_2_dispatch_4_unpack_i32() attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
%c0_i32 = arith.constant 0 : i32
%c2 = arith.constant 2 : index
%c128 = arith.constant 128 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1x1x2x8xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x3x2xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1x1x2x8xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x3x2xi32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
scf.for %arg0 = %workgroup_id_x to %c2 step %workgroup_count_x {
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
index 50a28e3..18c1677 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir
@@ -1,6 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-codegen-reconcile-translation-info)))" %s --verify-diagnostics | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @err_multiple_entry_point {
// expected-error @+1 {{reconciliation for multiple export ops unsupported}}
hal.executable.variant public @reconcile_workgroup_size target(#hal.executable.target<"", "", {}>) {
@@ -11,7 +13,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @reconcile_workgroup_size {
hal.executable.variant public @reconcile_workgroup_size target(#hal.executable.target<"", "", {}>) {
hal.executable.export public @entry_point layout(#pipeline_layout)
@@ -31,7 +35,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @single_translation_info {
hal.executable.variant public @single_translation_info target(#hal.executable.target<"", "", {}>) {
hal.executable.export public @entry_point layout(#pipeline_layout)
@@ -51,7 +57,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @err_mistmatched_workgroup_size {
hal.executable.variant public @err_mismatched_workgroup_size target(#hal.executable.target<"", "", {}>) {
// expected-error @+1 {{failed to reconcile workgroup sizes}}
@@ -69,7 +77,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @err_mistmatched_workgroup_size2 {
hal.executable.variant public @err_mismatched_workgroup_size2 target(#hal.executable.target<"", "", {}>) {
// expected-error @+1 {{failed to reconcile workgroup sizes}}
@@ -87,7 +97,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @reconcile_subgroup_size {
hal.executable.variant public @reconcile_subgroup_size target(#hal.executable.target<"", "", {}>) {
hal.executable.export public @entry_point layout(#pipeline_layout)
@@ -107,7 +119,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @err_reconcile_subgroup_size {
hal.executable.variant public @err_reconcile_subgroup_size target(#hal.executable.target<"", "", {}>) {
hal.executable.export public @entry_point layout(#pipeline_layout)
@@ -127,7 +141,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @llvm_func_attrs {
hal.executable.variant public @llvm_func_attrs target(#hal.executable.target<"", "", {}>) {
hal.executable.export public @entry_point layout(#pipeline_layout)
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir
index 6607225..0ec9ceb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/remove_dead_allocs.mlir
@@ -19,13 +19,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @cleanup_only_assume_alignment_uses() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<42xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<42xf32>
memref.assume_alignment %0, 64 : memref<42xf32>
return
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
index 4fa8f0a..dc7a356 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-remove-single-iteration-loop)))))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [64, 1, 1]>
// CHECK-LABEL: func.func @dispatch_0()
@@ -48,11 +46,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @workgroup_tile_loop()
@@ -85,11 +81,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @workgroup_tile_loop_negative()
@@ -122,11 +116,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @both_workgroup_and_workitem()
@@ -187,7 +179,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
#map0 = affine_map<()[s0] -> (s0 ceildiv 4)>
#map1 = affine_map<()[s0] -> (s0 * 4)>
@@ -206,11 +202,11 @@
%cst = arith.constant 0.000000e+00 : f32
%c4 = arith.constant 4 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4xf32>
memref.assume_alignment %0, 64 : memref<4xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<4xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<4xf32>
memref.assume_alignment %1, 64 : memref<4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<4xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<4xf32>
memref.assume_alignment %2, 64 : memref<4xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index 89ac360..408885d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
@@ -2,13 +2,11 @@
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups{max-workgroup-parallel-dims=1}, canonicalize)), cse)))' --split-input-file %s | FileCheck %s -check-prefix=CHECKW
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups{distribution-method=2})), canonicalize, cse)))' --split-input-file %s | FileCheck %s -check-prefix=NO-LOOP
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [16, 4, 0], [0, 0, 64]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -31,13 +29,13 @@
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
@@ -72,10 +70,10 @@
// CHECK-DAG: %[[M:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(0)
// CHECK-DAG: %[[N:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(1)
// CHECK-DAG: %[[K:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(2)
-// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[INIT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[INIT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK-DAG: %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0]
// CHECK-DAG: %[[WG_COUNT_X:.+]] = hal.interface.workgroup.count[0]
// CHECK-DAG: %[[WG_ID_Y:.+]] = hal.interface.workgroup.id[1]
@@ -100,12 +98,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [1, 4], [0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -128,11 +124,11 @@
%cl_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -175,12 +171,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 64, 64, 64], [1, 1, 1, 4], [0, 0, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -205,11 +199,11 @@
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
%3 = flow.dispatch.workload.ordinal %cl_3, 3 : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32)
: !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
@@ -254,12 +248,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[2, 64, 64, 64], [1, 1, 1, 4], [0, 0, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -284,11 +276,11 @@
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
%3 = flow.dispatch.workload.ordinal %cl_3, 3 : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32)
: !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
@@ -337,9 +329,9 @@
// CHECK-DAG: %[[D1:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(1) : index
// CHECK-DAG: %[[D2:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(2) : index
// CHECK-DAG: %[[D3:.*]] = hal.interface.constant.load layout({{.+}}) ordinal(3) : index
-// CHECK-DAG: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
-// CHECK-DAG: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
-// CHECK-DAG: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
+// CHECK-DAG: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
+// CHECK-DAG: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
+// CHECK-DAG: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%[[D0]], %[[D1]], %[[D2]], %[[D3]]}
// CHECK: %[[WORKGROUP_ID_X:.*]] = hal.interface.workgroup.id[0] : index
// CHECK: %[[WORKGROUP_COUNT_X:.*]] = hal.interface.workgroup.count[0] : index
// CHECK: %[[WORKGROUP_ID_Y:.*]] = hal.interface.workgroup.id[1] : index
@@ -374,12 +366,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[2, 64, 0, 64], [1, 1, 1, 4], [0, 0, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -404,11 +394,11 @@
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
%3 = flow.dispatch.workload.ordinal %cl_3, 3 : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32)
: !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
@@ -449,12 +439,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 64, 0], [1, 16, 4, 0], [0, 0, 0, 64]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -479,11 +467,11 @@
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
%3 = flow.dispatch.workload.ordinal %cl_3, 3 : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %1, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32)
: !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %3, %2}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32)
: !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%0, %1, %2}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [%0, %1, %3], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %1, %3} -> tensor<?x?x?xf32>
@@ -523,12 +511,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 16, 0], [16, 8, 0], [0, 0, 2]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
@@ -542,11 +528,11 @@
builtin.module {
func.func @preset_config() attributes {translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
@@ -587,11 +573,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 10, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 10, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUBufferOpsTileAndVectorize>
@@ -624,8 +608,8 @@
%dest_offset_x = flow.dispatch.workload.ordinal %cl_7, 7: index
%slice_size_y = flow.dispatch.workload.ordinal %cl_8, 8: index
%slice_size_x = flow.dispatch.workload.ordinal %cl_9, 9: index
- %source = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xi32>{%source_size_y, %source_size_x}
- %dest = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xi32>{%dest_size_y, %dest_size_x}
+ %source = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xi32>{%source_size_y, %source_size_x}
+ %dest = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xi32>{%dest_size_y, %dest_size_x}
%source_subview = memref.subview %source[%source_offset_y, %source_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
%dest_subview = memref.subview %dest[%dest_offset_y, %dest_offset_x] [%slice_size_y, %slice_size_x] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset : ?>>
linalg.generic {
@@ -663,8 +647,8 @@
// CHECK-DAG: %[[DEST_OFFSET_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(7) : index
// CHECK-DAG: %[[SLICE_SIZE_Y:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(8) : index
// CHECK-DAG: %[[SLICE_SIZE_X:.+]] = hal.interface.constant.load layout({{.+}}) ordinal(9) : index
-// CHECK-DAG: %[[SOURCE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[DEST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[SOURCE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[DEST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[SOURCE:.+]] = memref.subview %[[SOURCE_BINDING]][%[[SOURCE_OFFSET_Y]], %[[SOURCE_OFFSET_X]]]
// CHECK-DAG: %[[DEST:.+]] = memref.subview %[[DEST_BINDING]][%[[DEST_OFFSET_Y]], %[[DEST_OFFSET_X]]]
// CHECK-DAG: %[[WG_ID_X:.+]] = hal.interface.workgroup.id[0]
@@ -688,11 +672,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[15]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUDefault>
@@ -708,9 +690,9 @@
%c2 = arith.constant 2 : index
%cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32>
%cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readwrite:tensor<32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readwrite:tensor<32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1]
: !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
@@ -743,11 +725,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUDefault>
@@ -765,8 +745,8 @@
%cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32>
%0 = bufferization.to_memref %cst_0 : memref<4xf32>
%1 = bufferization.to_memref %cst : memref<4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32>
iree_linalg_ext.fft {lowering_config = #config}
ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>)
return
@@ -794,12 +774,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [1, 4, 0], [0, 0, 4]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#map0 = affine_map<(d0, d1) -> (d0, d1)>
@@ -823,11 +801,11 @@
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%6 = tensor.empty(%0, %1) : tensor<?x?xf32>
%7 = linalg.generic {
@@ -879,12 +857,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 64, 64, 64, 0, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -918,11 +894,11 @@
%6 = flow.dispatch.workload.ordinal %cl_6, 6 : index
%7 = flow.dispatch.workload.ordinal %cl_7, 7 : index
%8 = flow.dispatch.workload.ordinal %cl_8, 8 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%4, %5, %3, %6}
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xf32>>{%0, %7, %8, %6}
%12 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
@@ -969,12 +945,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 20, 40, 48, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -991,11 +965,11 @@
builtin.module {
func.func @conv_static() attributes {translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<1x161x161x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<1x80x80x96xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 96], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<1x161x161x96xf32>> -> tensor<1x161x161x96xf32>
@@ -1043,11 +1017,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[16, 32], [16, 16], [0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1065,9 +1037,9 @@
}
builtin.module {
func.func @generic_static() attributes {translation_info = #translation} {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<96x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<writeonly:tensor<16x96xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [96, 16], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<96x16xf32>> -> tensor<96x16xf32>
@@ -1110,12 +1082,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[28, 8, 0], [4, 4, 0], [0, 0, 60]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {
data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
@@ -1132,11 +1102,11 @@
builtin.module {
func.func @matmul_static() attributes {translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<196x240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<240x40xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<196x40xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 240], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<196x240xf32>> -> tensor<196x240xf32>
@@ -1165,12 +1135,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 7, 64, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {
data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
@@ -1187,11 +1155,11 @@
builtin.module {
func.func @restrict_num_workgroups() attributes {translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<1x11x11x576xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<5x5x576xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<1x7x7x576xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<1x11x11x576xf32>> -> tensor<1x11x11x576xf32>
@@ -1219,12 +1187,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[4, 0, 0], [4, 0, 0], [0, 1, 4]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1292,12 +1258,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 0, 0], [8, 0, 0], [0, 0, 16]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1318,11 +1282,11 @@
%cl_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<?x1xf32>>{%0}
%5 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%1, 1], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%1} -> tensor<?x1xf32>
@@ -1364,12 +1328,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0], [0, 0, 0], [0, 0, 16]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1387,11 +1349,11 @@
func.func @gemm_unit_M_unit_N() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%0}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0} -> tensor<1x?xf32>
@@ -1423,11 +1385,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0, 0, 64, 64, 0, 64], [0, 1, 0, 0, 1, 1, 0, 4], [0, 0, 0, 0, 0, 0, 0, 0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1452,9 +1412,9 @@
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
%3 = flow.dispatch.workload.ordinal %cl_3, 3 : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<writeonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [1, %0, 1, 1, %1, %2, 1, %3], strides = [1, 1, 1, 1, 1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3} -> tensor<1x?x1x1x?x?x1x?xf32>
@@ -1497,11 +1457,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[0], [0], [4]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1521,9 +1479,9 @@
func.func @reduce_to_scalar() attributes {translation_info = #translation} {
%cl_0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readwrite:tensor<f32>>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [%0], strides = [1]
: !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0} -> tensor<?xf32>
@@ -1557,11 +1515,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1578,9 +1534,9 @@
}
builtin.module {
func.func @scalar() attributes {translation_info = #translation} {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<f32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = []
: !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
@@ -1614,11 +1570,9 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[2], [2], [0]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1635,9 +1589,9 @@
}
builtin.module {
func.func @rank_reduced_slice() attributes {translation_info = #translation} {
- %in_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %in_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<5x40xf32>>
- %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %out_binding = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<writeonly:tensor<10xf32>>
%in = flow.dispatch.tensor.load %in_binding, offsets = [3, 10], sizes = [1, 10], strides = [2, 1]
: !flow.dispatch.tensor<readonly:tensor<5x40xf32>> -> tensor<10xf32>
@@ -1667,9 +1621,9 @@
// CHECK: hal.return %[[C5]], %[[C1]], %[[C1]]
// CHECK: func.func @rank_reduced_slice()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
-// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: : !flow.dispatch.tensor<readonly:tensor<5x40xf32>>
-// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[DST_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: : !flow.dispatch.tensor<writeonly:tensor<10xf32>>
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: %[[OFFSET:.+]] = affine.apply #[[MAP]]()[%[[IV0]]]
@@ -1681,13 +1635,11 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [{sizes=[32, 64, 0], interchange=[1, 0, 2]}, [8, 32, 0], [0, 0, 16]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -1709,13 +1661,13 @@
%0 = flow.dispatch.workload.ordinal %cl_0, 0 : index
%1 = flow.dispatch.workload.ordinal %cl_1, 1 : index
%2 = flow.dispatch.workload.ordinal %cl_2, 2 : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
@@ -1753,11 +1705,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @no_compute {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
@@ -1784,9 +1734,9 @@
%7 = flow.dispatch.workload.ordinal %2, 2 : index
%8 = flow.dispatch.workload.ordinal %3, 3 : index
%9 = flow.dispatch.workload.ordinal %4, 4 : index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<?x?x?xf32>{%5, %6, %7}
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<?x?x?xf32>{%5, %6, %7}
memref.assume_alignment %10, 64 : memref<?x?x?xf32>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1x?x?xf32>{%8, %9}
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1x?x?xf32>{%8, %9}
memref.assume_alignment %11, 64 : memref<1x?x?xf32>
return
}
@@ -1800,13 +1750,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @tile_multiuse_producer {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf_x86_64", {}>) {
@@ -1820,13 +1768,13 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<12x128x128xf32>>
- %s0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %s0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<12x128x128xf32>>
- %s1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %s1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<12x128xf32>>
- %s2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0)
+ %s2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<12x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 128], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<12x128x128xf32>> -> tensor<12x128x128xf32>
@@ -1877,10 +1825,10 @@
}
}
// CHECK-LABEL: func @tile_multiuse_producer()
-// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
-// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
-// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
-// CHECK-DAG: %[[RESULT_BINDING2:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3)
+// CHECK-DAG: %[[SRC_BINDING:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
+// CHECK-DAG: %[[RESULT_BINDING0:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
+// CHECK-DAG: %[[RESULT_BINDING1:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
+// CHECK-DAG: %[[RESULT_BINDING2:.+]] = hal.interface.binding.subspan layout(#pipeline_layout) binding(3)
// CHECK: scf.for %[[IV0:.+]] =
// CHECK: scf.for %[[IV1:.+]] =
// CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[SRC_BINDING]], offsets = [%[[IV0]], %[[IV1]], 0]
@@ -1902,13 +1850,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @no_tile {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
@@ -1921,10 +1867,10 @@
func.func @no_tile() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<3xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c64) : !flow.dispatch.tensor<readwrite:tensor<3xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<3xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c64) : !flow.dispatch.tensor<readwrite:tensor<3xi32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor<readonly:tensor<10xf32>> -> tensor<10xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor<readonly:tensor<10xi32>> -> tensor<10xi32>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [3], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<3xf32>> -> tensor<3xf32>
@@ -1947,11 +1893,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @pack_lowering {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
@@ -1964,9 +1908,9 @@
func.func @gemm_lhs_pack() attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<100x250xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<14x64x8x4xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [100, 250], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<100x250xf32>> -> tensor<100x250xf32>
@@ -1990,11 +1934,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @pack_lowering {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
@@ -2008,9 +1950,9 @@
%c0 = arith.constant 0 : index
%c114688 = arith.constant 114688 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c114688)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c114688)
: !flow.dispatch.tensor<writeonly:tensor<64x64x8x4xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
@@ -2033,18 +1975,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @clone_index_computations {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
- hal.executable.export public @clone_index_computations ordinal(0) layout(
- #hal.pipeline.layout<push_constants = 4, sets = [
- <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
- {
+ hal.executable.export public @clone_index_computations ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3 : index, %arg4 : index):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
hal.return %x, %y, %z : index, index, index
@@ -2065,11 +2002,11 @@
%5 = flow.dispatch.workload.ordinal %1, 1 : index
%6 = flow.dispatch.workload.ordinal %2, 2 : index
%7 = flow.dispatch.workload.ordinal %3, 3 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %5}
%9 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%6]
%10 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%7]
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<?x?x8x4xf32>>{%9, %10}
%12 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %5} -> tensor<?x?xf32>
@@ -2102,11 +2039,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dynamic_unpack {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
@@ -2131,8 +2068,8 @@
%5 = flow.dispatch.workload.ordinal %1, 1 : index
%6 = flow.dispatch.workload.ordinal %2, 2 : index
%7 = flow.dispatch.workload.ordinal %3, 3 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
%11 = tensor.empty(%6, %7) : tensor<?x?xi32>
%12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11
@@ -2151,18 +2088,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dynamic_unpack_dynamic_tile {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {}>) {
- hal.executable.export public @dynamic_unpack_dynamic_tile ordinal(0) layout(
- #hal.pipeline.layout<push_constants = 4, sets = [
- <0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
- {
+ hal.executable.export public @dynamic_unpack_dynamic_tile ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3, %arg4
hal.return %x, %y, %z : index, index, index
@@ -2185,8 +2119,8 @@
%5 = flow.dispatch.workload.ordinal %1, 1 : index
%6 = flow.dispatch.workload.ordinal %2, 2 : index
%7 = flow.dispatch.workload.ordinal %3, 3 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%4, %5, %c32, %c16}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%4, %5, %c32, %c16}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, %c32, %c16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%4, %5, %c32, %c16} -> tensor<?x?x?x?xi32>
%11 = tensor.empty(%6, %7) : tensor<?x?xi32>
%12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%c32, %c16] into %11
@@ -2205,11 +2139,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @unpack_elem {
hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {}>) {
@@ -2221,8 +2153,8 @@
builtin.module {
func.func @unpack_elem() attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x48x8x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x48x8x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [16, 48, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x48x8x8xf32>> -> tensor<16x48x8x8xf32>
%3 = tensor.empty() : tensor<128x384xf32>
%4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>} : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
@@ -2246,11 +2178,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
@@ -2277,10 +2207,10 @@
%0:2 = iree_codegen.query_tile_sizes tensor<12544x16xi32, #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2]>> -> index, index
%1 = affine.apply affine_map<()[s0] -> (12544 ceildiv s0)>()[%0#0]
%2 = affine.apply affine_map<()[s0] -> (16 ceildiv s0)>()[%0#1]
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c200960) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1003776) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<12544xi32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1053952) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16xi32>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12544x16xi32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c200960) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1003776) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<12544xi32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1053952) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16xi32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12544x16xi32>>
%10 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [%1, %2, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1} -> tensor<?x?x?x?xi32>
%11 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [12544], strides = [1] : !flow.dispatch.tensor<readonly:tensor<12544xi32>> -> tensor<12544xi32>
%12 = flow.dispatch.tensor.load %5, offsets = [0], sizes = [16], strides = [1] : !flow.dispatch.tensor<readonly:tensor<16xi32>> -> tensor<16xi32>
@@ -2311,15 +2241,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>,
- #hal.descriptor_set.binding<5, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @elem_pack {
hal.executable.variant public @embedded_elf_arm_64 target(<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>) {
@@ -2338,15 +2266,15 @@
%c1572864 = arith.constant 1572864 : index
%c2359296 = arith.constant 2359296 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1339392) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x2x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c786432) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xi32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c823296) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c825344) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<48x512x8x1xf32>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c1572864) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) alignment(64) offset(%c2359296) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1339392) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x2x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c786432) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xi32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c823296) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c825344) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<48x512x8x1xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c1572864) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) alignment(64) offset(%c2359296) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
%9 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 2, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x2x512xf32>> -> tensor<1x2x512xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%11 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
@@ -2385,16 +2313,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @scatter {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
hal.executable.export public @scatter ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
+ layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+ ]>)
{
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
@@ -2406,9 +2335,9 @@
%c251668480 = arith.constant 251668480 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c228075520) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5898240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c251668480) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5898240x4xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x640x48x48xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c228075520) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5898240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c251668480) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5898240x4xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x640x48x48xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [5898240], strides = [1] : !flow.dispatch.tensor<readonly:tensor<5898240xf32>> -> tensor<5898240xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [5898240, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<5898240x4xi32>> -> tensor<5898240x4xi32>
%5 = tensor.empty() : tensor<1x640x48x48xf32>
@@ -2429,11 +2358,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @collapse_workgroups_dispatch_dispatch_0 {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -2445,8 +2372,8 @@
builtin.module {
func.func @collapse_workgroups_dispatch_dispatch_0_generic_1024x128x16x64() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x16x128x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x128x16x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x16x128x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x128x16x64xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1024, 16, 128, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x16x128x64xf32>> -> tensor<1024x16x128x64xf32>
%3 = tensor.empty() : tensor<1024x128x16x64xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<1024x16x128x64xf32>) outs(%3 : tensor<1024x128x16x64xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 1, 64]]>} {
@@ -2472,13 +2399,11 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [16, 4, 0], [0, 0, 64]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {
data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
@@ -2498,13 +2423,13 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3)
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3)
: !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
@@ -2539,13 +2464,11 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [16, 4, 0], [0, 0, 64]]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
#map = affine_map<()[s0] -> (s0 ceildiv 64)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
module {
@@ -2563,10 +2486,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -2584,9 +2507,9 @@
}
// CHECK-LABEL: func.func @matmul_already_distributed
-// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// CHECK-NOT: scf.for
// CHECK: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [%workgroup_id_y, 0]
// CHECK: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]], offsets = [0, %workgroup_id_x]
@@ -2597,11 +2520,9 @@
// Check that the distribution avoids distributing unit-trip count loops.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @avoid_unit_range_distribute {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -2638,9 +2559,9 @@
%20 = arith.index_castui %19 : i64 to index
%21 = flow.dispatch.workload.ordinal %15, 0 : index
%22 = flow.dispatch.workload.ordinal %20, 1 : index
- %23 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x?x16x16xf16>>{%21, %22}
- %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x8x16x16xf16>>{%22}
- %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?x16x8x16xf16>>{%22}
+ %23 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x?x16x16xf16>>{%21, %22}
+ %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x8x16x16xf16>>{%22}
+ %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?x16x8x16xf16>>{%22}
%26 = flow.dispatch.tensor.load %23, offsets = [0, 0, 0, 0, 0], sizes = [32, %21, %22, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?x?x16x16xf16>>{%21, %22} -> tensor<32x?x?x16x16xf16>
%27 = flow.dispatch.tensor.load %24, offsets = [0, 0, 0, 0, 0], sizes = [32, %22, 8, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?x8x16x16xf16>>{%22} -> tensor<32x?x8x16x16xf16>
%28 = tensor.empty(%22) : tensor<32x?x16x8x16xf16>
@@ -2674,12 +2595,10 @@
// Check that the distribution avoids distributing unit-trip count loops.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @set_size_to_tilesize_when_divisible {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -2715,10 +2634,10 @@
%19 = arith.ori %16, %18 : i64
%20 = arith.index_castui %19 : i64 to index
%21 = flow.dispatch.workload.ordinal %20, 1 : index
- %22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xf16>>
+ %22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xf16>>
%23 = flow.dispatch.workload.ordinal %21, 2 : index
- %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x16x32x128xf16>>{%21}
- %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%10) : !flow.dispatch.tensor<writeonly:tensor<?x16x4096xf16>>{%23}
+ %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x16x32x128xf16>>{%21}
+ %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%10) : !flow.dispatch.tensor<writeonly:tensor<?x16x4096xf16>>{%23}
%26 = flow.dispatch.workload.ordinal %15, 0 : index
%27 = flow.dispatch.tensor.load %24, offsets = [0, 0, 0, 0], sizes = [%21, 16, 32, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x16x32x128xf16>>{%21} -> tensor<?x16x32x128xf16>
%28 = flow.dispatch.tensor.load %22, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xf16>> -> tensor<4096x32x128xf16>
@@ -2751,12 +2670,10 @@
// -----
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 16, 0], [16, 8, 0], [0, 0, 2]]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
@@ -2770,11 +2687,11 @@
builtin.module {
func.func @reshape_matmul() attributes {translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
: !flow.dispatch.tensor<readonly:tensor<64x2x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1)
: !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2)
: !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 2, 256], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<64x2x256xf32>> -> tensor<64x2x256xf32>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
index 982c6ab..55b6ffd 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-type-propagation))" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @generic_op_illegal_operand() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%3 = arith.trunci %2 : tensor<?xi8> to tensor<?xi1>
%4 = tensor.empty(%d) : tensor<?xi8>
@@ -25,8 +23,8 @@
return
}
// CHECK-LABEL: func.func @generic_op_illegal_operand()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -40,16 +38,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @generic_op_illegal_operand_i7() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%3 = arith.trunci %2 : tensor<?xi8> to tensor<?xi7>
%4 = tensor.empty(%d) : tensor<?xi8>
@@ -65,8 +61,8 @@
return
}
// CHECK-LABEL: func.func @generic_op_illegal_operand_i7()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -80,16 +76,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @generic_op_illegal_operand_i33() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi64>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi64>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi64>>{%d} -> tensor<?xi64>
%3 = arith.trunci %2 : tensor<?xi64> to tensor<?xi33>
%4 = tensor.empty(%d) : tensor<?xi64>
@@ -105,8 +99,8 @@
return
}
// CHECK-LABEL: func.func @generic_op_illegal_operand_i33()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xi64>
// CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -120,16 +114,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @generic_op_illegal_result() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%3 = tensor.empty(%d) : tensor<?xi1>
%4 = linalg.generic {
@@ -145,8 +137,8 @@
return
}
// CHECK-LABEL: func.func @generic_op_illegal_result()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xi8>
// CHECK: %[[GENERIC:.+]] = linalg.generic
@@ -160,18 +152,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_extract() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%3 = tensor.extract_slice %2[%offset] [%size] [1] : tensor<?xi8> to tensor<?xi8>
%4 = arith.trunci %3 : tensor<?xi8> to tensor<?xi1>
@@ -180,28 +170,26 @@
return
}
// CHECK-LABEL: func.func @tensor_extract()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK: %[[EXTRACT:.+]] = tensor.extract_slice %[[INTENSOR]]
// CHECK: flow.dispatch.tensor.store %[[EXTRACT]], %[[OUT]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_insert() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%size = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%3 = flow.dispatch.tensor.load %0, offsets = [%offset], sizes=[%size], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%5 = arith.trunci %3 : tensor<?xi8> to tensor<?xi1>
@@ -212,9 +200,9 @@
return
}
// CHECK-LABEL: func.func @tensor_insert()
-// CHECK-DAG: %[[IN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[IN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[IN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[IN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[IN1TENSOR:.+]] = flow.dispatch.tensor.load %[[IN1]]
// CHECK-DAG: %[[IN2TENSOR:.+]] = flow.dispatch.tensor.load %[[IN2]]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[IN1TENSOR]] into %[[IN2TENSOR]]
@@ -222,18 +210,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @for_loop() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%lb = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%step = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%3 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d} -> tensor<?xi8>
%4 = arith.trunci %2 : tensor<?xi8> to tensor<?xi1>
@@ -249,8 +235,8 @@
return
}
// CHECK-LABEL: func.func @for_loop()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]
// CHECK-DAG: %[[OUTTENSOR:.+]] = flow.dispatch.tensor.load %[[OUT]]
// CHECK: %[[FOR:.+]] = scf.for
@@ -262,14 +248,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @fill_op() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%d}
%1 = tensor.empty(%d) : tensor<?xi1>
%false = arith.constant false
%2 = linalg.fill ins(%false : i1) outs(%1 : tensor<?xi1>) -> tensor<?xi1>
@@ -278,7 +262,7 @@
return
}
// CHECK-LABEL: func.func @fill_op()
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-DAG: %[[INIT:.+]] = tensor.empty
// CHECK-DAG: %[[FALSE:.+]] = arith.constant false
// CHECK-DAG: %[[EXT_SCALAR:.+]] = arith.extui %[[FALSE]]
@@ -289,16 +273,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0) -> (d0)>
func.func @constant_op() {
- %a = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
- %b = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
- %c = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<4xi32>>
+ %a = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
+ %b = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
+ %c = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<4xi32>>
%at = flow.dispatch.tensor.load %a, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi32>> -> tensor<4xi32>
%bt = flow.dispatch.tensor.load %b, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi32>> -> tensor<4xi32>
%select = arith.constant dense<[true, false, true, false]> : tensor<4xi1>
@@ -326,16 +308,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0) -> (d0)>
func.func @constant_splat_op() {
- %a = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
- %b = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
- %c = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<writeonly:tensor<4xi32>>
+ %a = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
+ %b = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4xi32>>
+ %c = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<writeonly:tensor<4xi32>>
%at = flow.dispatch.tensor.load %a, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi32>> -> tensor<4xi32>
%bt = flow.dispatch.tensor.load %b, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4xi32>> -> tensor<4xi32>
%select = arith.constant dense<true> : tensor<4xi1>
@@ -357,18 +337,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @tensor_extract() {
%c0 = arith.constant 0 : index
%c13 = arith.constant 13 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<14xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<14xi8>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [14], strides = [1]
: !flow.dispatch.tensor<readonly:tensor<14xi8>> -> tensor<14xi8>
@@ -389,7 +367,7 @@
return
}
// CHECK-LABEL: func @tensor_extract()
-// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: !flow.dispatch.tensor<readonly:tensor<14xi8>>
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[BINDING]]
// CHECK: %[[EXTRACTED:.+]] = tensor.extract %[[LOAD]]
@@ -425,18 +403,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @scatter() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x1xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<3xi8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x1xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<3xi8>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [8], strides = [1] : !flow.dispatch.tensor<readonly:tensor<8xi8>> -> tensor<8xi8>
%4 = arith.trunci %3 : tensor<8xi8> to tensor<8xi1>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x1xi32>> -> tensor<8x1xi32>
@@ -453,9 +429,9 @@
}
// CHECK-LABEL: func.func @scatter()
-// CHECK-DAG: %[[UPDATES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[UPDATES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[INDICES:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[UPDATES_TENSOR:.+]] = flow.dispatch.tensor.load %[[UPDATES]]
// CHECK-DAG: %[[INDICES_TENSOR:.+]] = flow.dispatch.tensor.load %[[INDICES]]
// CHECK-DAG: %[[OUT_TENSOR:.+]] = flow.dispatch.tensor.load %[[OUT]]
@@ -472,16 +448,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sort() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1xi8>> -> tensor<1xi8>
%3 = arith.trunci %2 : tensor<1xi8> to tensor<1xi1>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<1xi32>> -> tensor<1xi32>
@@ -496,8 +470,8 @@
// CHECK-LABEL: func.func @sort()
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[A_TENSOR:.+]] = flow.dispatch.tensor.load %[[A]]
// CHECK-DAG: %[[B_TENSOR:.+]] = flow.dispatch.tensor.load %[[B]]
// CHECK: %[[SORT:.+]]:2 = iree_linalg_ext.sort dimension(0)
@@ -511,16 +485,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sort_secondary() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xi8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xi8>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1xi32>> -> tensor<1xi32>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<1xi8>> -> tensor<1xi8>
%4 = arith.trunci %3 : tensor<1xi8> to tensor<1xi1>
@@ -536,8 +508,8 @@
// CHECK-LABEL: func.func @sort_secondary()
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[A_TENSOR:.+]] = flow.dispatch.tensor.load %[[A]]
// CHECK-DAG: %[[B_TENSOR:.+]] = flow.dispatch.tensor.load %[[B]]
// CHECK: %[[SORT:.+]]:2 = iree_linalg_ext.sort dimension(0)
@@ -549,16 +521,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @branch_op() {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<i8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<i8>>
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i8
%4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i8>> -> tensor<i8>
%5 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i8>> -> tensor<i8>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir
index 367a1fc..d095933 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/type_propagation_packing.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-type-propagation))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @generic_op_i4() {
%d = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi4>>{%d}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi4>>{%d}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xi4>>{%d}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xi4>>{%d}
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes=[%d], strides=[1] : !flow.dispatch.tensor<readonly:tensor<?xi4>>{%d} -> tensor<?xi4>
%4 = tensor.empty(%d) : tensor<?xi4>
%5 = linalg.generic {
@@ -25,8 +23,8 @@
}
// CHECK-LABEL: func.func @generic_op_i4()
-// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[IN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[OUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[INTENSOR:.+]] = flow.dispatch.tensor.load %[[IN]]{{.+}} -> tensor<?xi4>
// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xi4>
// CHECK: %[[GENERIC:.+]] = linalg.generic
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index 1143c67..dcab0b7 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -121,8 +121,7 @@
if (!bufferMemrefType)
continue;
- if (bufferSubspanOp.getSet() != subspanOp.getSet() ||
- bufferSubspanOp.getBinding() != subspanOp.getBinding() ||
+ if (bufferSubspanOp.getBinding() != subspanOp.getBinding() ||
bufferSubspanOp.getDescriptorType() != subspanOp.getDescriptorType() ||
bufferSubspanOp.getByteOffset() != subspanOp.getByteOffset() ||
!llvm::equal(bufferSubspanOp.getDynamicDims(),
@@ -139,7 +138,7 @@
// Just change the result type of the InterfaceBindingSubspanOp.
Value buffer = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
subspanOp->getLoc(), memRefType, subspanOp.getLayout(),
- subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(),
+ subspanOp.getBinding(), subspanOp.getByteOffset(),
subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(),
subspanOp.getDescriptorFlagsAttr());
rewriter.create<memref::AssumeAlignmentOp>(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 21847b4..e006e1d 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -309,9 +309,10 @@
subspanOp,
"failed to convert interface.binding.subspan result to memref type");
}
- auto memRefDesc = abi.loadBinding(
- subspanOp, subspanOp.getFlatBindingIndex(), operands.getByteOffset(),
- memRefType, operands.getDynamicDims(), rewriter);
+ auto memRefDesc =
+ abi.loadBinding(subspanOp, subspanOp.getBinding().getSExtValue(),
+ operands.getByteOffset(), memRefType,
+ operands.getDynamicDims(), rewriter);
rewriter.replaceOp(subspanOp, {memRefDesc});
return success();
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 6c9af29..154ab37 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -277,13 +277,13 @@
getMemberOf("workgroup_size_x", getUint32T(), &offsetInBits),
getMemberOf("workgroup_size_y", getUint32T(), &offsetInBits),
getMemberOf("workgroup_size_z", getUint16T(), &offsetInBits),
- getMemberOf("push_constant_count", getUint16T(), &offsetInBits),
+ getMemberOf("constant_count", getUint16T(), &offsetInBits),
getMemberOf("workgroup_count_x", getUint32T(), &offsetInBits),
getMemberOf("workgroup_count_y", getUint32T(), &offsetInBits),
getMemberOf("workgroup_count_z", getUint16T(), &offsetInBits),
getMemberOf("max_concurrency", getUint8T(), &offsetInBits),
getMemberOf("binding_count", getUint8T(), &offsetInBits),
- getMemberOf("push_constants",
+ getMemberOf("constants",
getPtrOf(getConstOf(getArrayOf(getUint32T(), 64))),
&offsetInBits),
getMemberOf(
@@ -412,7 +412,7 @@
fieldTypes.push_back(uint32Type);
fieldTypes.push_back(uint16Type);
- // uint16_t push_constant_count;
+ // uint16_t constant_count;
fieldTypes.push_back(uint16Type);
// uint32_t workgroup_count_x;
@@ -428,7 +428,7 @@
// uint8_t binding_count;
fieldTypes.push_back(uint8Type);
- // const uint32_t * push_constants;
+ // const uint32_t * constants;
// void *const * binding_ptrs;
// const size_t * binding_lengths;
fieldTypes.push_back(opaquePtrType);
@@ -698,11 +698,11 @@
Value HALDispatchABI::loadPushConstantCount(Operation *forOp,
OpBuilder &builder) {
auto countValue =
- loadFieldValue(forOp, DispatchStateField::push_constant_count, builder);
+ loadFieldValue(forOp, DispatchStateField::constant_count, builder);
auto resultValue = castValueToType(
forOp->getLoc(), countValue,
typeConverter->convertType(builder.getIndexType()), builder);
- return buildValueDI(forOp, resultValue, "push_constant_count", di.getSizeT(),
+ return buildValueDI(forOp, resultValue, "constant_count", di.getSizeT(),
builder);
}
@@ -710,7 +710,7 @@
Type resultType, OpBuilder &builder) {
auto loc = forOp->getLoc();
auto constantsPtrValue =
- loadFieldValue(forOp, DispatchStateField::push_constants, builder);
+ loadFieldValue(forOp, DispatchStateField::constants, builder);
auto pushConstantType = IntegerType::get(context, 32);
Value constantPtrValue = builder.create<LLVM::GEPOp>(
loc, constantsPtrValue.getType(), pushConstantType, constantsPtrValue,
@@ -719,8 +719,7 @@
builder.create<LLVM::LoadOp>(loc, pushConstantType, constantPtrValue);
auto resultValue = castValueToType(loc, constantValue, resultType, builder);
return buildValueDI(forOp, resultValue,
- StringRef("push_constant[") + std::to_string(offset) +
- "]",
+ StringRef("constant[") + std::to_string(offset) + "]",
di.getBasicType(resultType), builder);
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
index d87b063..ab341cd 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.h
@@ -156,13 +156,13 @@
/*uint32_t*/ workgroup_size_x,
/*uint32_t*/ workgroup_size_y,
/*uint16_t*/ workgroup_size_z,
- /*uint16_t*/ push_constant_count,
+ /*uint16_t*/ constant_count,
/*uint32_t*/ workgroup_count_x,
/*uint32_t*/ workgroup_count_y,
/*uint16_t*/ workgroup_count_z,
/*uint8_t*/ max_concurrency,
/*uint8_t*/ binding_count,
- /*intptr_t*/ push_constants,
+ /*intptr_t*/ constants,
/*intptr_t*/ binding_ptrs,
/*intptr_t*/ binding_lengths,
};
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir
index 327ef62..839e38b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_dotprod_vector_lowering.mlir
@@ -5,11 +5,9 @@
data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
native_vector_size = 16 : index,
target_triple = "aarch64-none-linux-android29"}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @mmt4d_kernel_dispatch() attributes {hal.executable.target = #target} {
%c0_i8 = arith.constant 0 : i8
@@ -19,11 +17,11 @@
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x2x8x4xi8>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x2x8x4xi8>
memref.assume_alignment %0, 64 : memref<1x2x8x4xi8>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) : memref<1x2x8x4xi8>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) : memref<1x2x8x4xi8>
memref.assume_alignment %1, 64 : memref<1x2x8x4xi8>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c128) : memref<1x1x8x8xi32>
memref.assume_alignment %2, 64 : memref<1x1x8x8xi32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir
index 814138a..38dd757 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/aarch64_vector_lowering.mlir
@@ -2,12 +2,10 @@
// RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-llvmcpu-mmt4d-vector-lowering{vector-contract-custom-kernels=false}))" --split-input-file | FileCheck %s -check-prefix=CHECK-KERNEL-OFF
// RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-llvmcpu-mmt4d-vector-lowering{vector-contract-custom-kernels=true}))" --split-input-file | FileCheck %s -check-prefix=CHECK-KERNEL-ON
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -24,9 +22,9 @@
%cst_0 = arith.constant 0.000000e+00 : f32
%c384 = arith.constant 384 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -71,9 +69,9 @@
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
-// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
-// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
-// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+// CHECK: %[[DST:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
// CHECK: %[[DST_TILE_INIT:.+]] = tensor.empty()
// CHECK: scf.for %[[I_IDX:.+]] = {{.*}} to %[[C384]] step %{{[0-9]*}} {
// CHECK: %[[LHS_TILE:.+]] = flow.dispatch.tensor.load %[[LHS]], {{.*}} -> tensor<64x512xf32>
@@ -95,15 +93,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>,
- #hal.descriptor_set.binding<5, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -125,12 +121,12 @@
%c1835008 = arith.constant 1835008 : index
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%c1835008) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) offset(%c1835008) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x512xf32>> -> tensor<2x512xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir
index 20c5933..4b3902c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/apply_scale_lowering.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-llvm))))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", {
cpu_features = "+m,+a,+f,+d,+c",
@@ -27,8 +25,8 @@
%cst = arith.constant dense<19689> : vector<2xi32>
%cst_0 = arith.constant dense<15> : vector<2xi8>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32>
%2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32>
%3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32>
vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32>
@@ -48,11 +46,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", {
cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+v",
@@ -75,8 +71,8 @@
%cst = arith.constant dense<19689> : vector<2xi32>
%cst_0 = arith.constant dense<15> : vector<2xi8>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32>
%2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32>
%3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32>
vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32>
@@ -94,11 +90,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", {
cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve64x",
@@ -121,8 +115,8 @@
%cst = arith.constant dense<19689> : vector<2xi32>
%cst_0 = arith.constant dense<15> : vector<2xi8>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32>
%2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32>
%3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32>
vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32>
@@ -140,11 +134,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", {
cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve32x",
@@ -167,8 +159,8 @@
%cst = arith.constant dense<19689> : vector<2xi32>
%cst_0 = arith.constant dense<15> : vector<2xi8>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32>
%2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32>
%3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32>
vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32>
@@ -193,11 +185,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_64", {
cpu_features = "+m,+a,+f,+d,+c,+zvl512b,+zve32f",
@@ -220,8 +210,8 @@
%cst = arith.constant dense<19689> : vector<2xi32>
%cst_0 = arith.constant dense<15> : vector<2xi8>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<2xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2xi32>
%2 = vector.load %0[%c0] : memref<2xi32>, vector<2xi32>
%3 = tosa.apply_scale %2, %cst, %cst_0 {double_round = false} : (vector<2xi32>, vector<2xi32>, vector<2xi8>) -> vector<2xi32>
vector.store %3, %1[%c0] : memref<2xi32>, vector<2xi32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
index 6268c4f..f9189db 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/convert_to_llvm.mlir
@@ -43,11 +43,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @interleave_and_bitcast_lowering() {
%cst = arith.constant dense<4> : vector<4x2xi8>
@@ -58,8 +56,8 @@
%c3 = arith.constant 3 : index
%c4096 = arith.constant 4096 : index
%c8192 = arith.constant 8192 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c4096) flags(ReadOnly) : memref<128xi8, strided<[1], offset: 4096>>
- %out_buffer = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c8192) : memref<256x64xi4, strided<[64, 1], offset: 8192>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c4096) flags(ReadOnly) : memref<128xi8, strided<[1], offset: 4096>>
+ %out_buffer = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c8192) : memref<256x64xi4, strided<[64, 1], offset: 8192>>
%2 = vector.load %0[%c0] : memref<128xi8, strided<[1], offset: 4096>>, vector<2xi8>
%3 = vector.bitcast %2 : vector<2xi8> to vector<4xi4>
%4 = vector.insert %3, %cst_0 [3] : vector<4xi4> into vector<4x4xi4>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
index 8675111..bda5e45 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_bindings.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --iree-convert-to-llvm --split-input-file %s | FileCheck %s --dump-input=always
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: llvm.func @binding_ptrs(
@@ -19,7 +17,7 @@
// CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr
%c72 = arith.constant 72 : index
%c128 = arith.constant 128 : index
- %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%c72) : memref<?x2xf32, strided<[2, 1], offset: 18>>{%c128}
+ %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c72) : memref<?x2xf32, strided<[2, 1], offset: 18>>{%c128}
// CHECK: %[[OFFSET_PTR0:.+]] = llvm.getelementptr %[[BASE_PTR]][18]
// CHECK: %[[OFFSET_D0:.+]] = llvm.mul %[[C5]], %[[C2]]
@@ -40,11 +38,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: llvm.func @binding_ptrs_dynamic(
@@ -80,7 +76,7 @@
// CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10]
// CHECK: %[[ARRAY_PTR:.+]] = llvm.getelementptr %[[BINDING_PTRS]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.ptr
// CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr
- %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>{%dim0, %dim1, %dim2}
+ %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>{%dim0, %dim1, %dim2}
// CHECK: %[[BASE_BIT_OFFSET:.+]] = llvm.mul %[[OFFSET_ZEXT]], %[[C8]]
// CHECK: %[[BASE_OFFSET:.+]] = llvm.udiv %[[BASE_BIT_OFFSET]], %[[C32]]
@@ -108,11 +104,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: llvm.func @binding_ptrs_sub_byte_dynamic(
@@ -131,7 +125,7 @@
// CHECK: %[[BINDING_PTRS:.+]] = llvm.extractvalue %[[STATE3]][10]
// CHECK: %[[ARRAY_PTR:.+]] = llvm.getelementptr %[[BINDING_PTRS]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.ptr
// CHECK: %[[BASE_PTR:.+]] = llvm.load %[[ARRAY_PTR]] : !llvm.ptr -> !llvm.ptr
- %memref = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%offset) : memref<?xi4, strided<[1], offset: ?>>{%dim0}
+ %memref = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%offset) : memref<?xi4, strided<[1], offset: ?>>{%dim0}
// CHECK: %[[BASE_BIT_OFFSET:.+]] = llvm.mul %[[OFFSET_ZEXT]], %[[C8]]
// CHECK: %[[BASE_OFFSET:.+]] = llvm.udiv %[[BASE_BIT_OFFSET]], %[[C4]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir
index cccef04..9be75c9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/hal_interface_constants.mlir
@@ -1,9 +1,7 @@
// RUN: iree-opt --iree-convert-to-llvm --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: llvm.func @constant_values
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir
index 7601ed0..db93959 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/illegal_configuration.mlir
@@ -1,20 +1,18 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --verify-diagnostics --split-input-file %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = []>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{expected four tiling levels, got 0}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -22,21 +20,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[4, 8], [8, 8, 0], [0, 0, 8], [0, 0, 0]], native_vector_size = [0, 0, 4]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{native_vector_size must be empty}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -44,12 +40,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [8, 32, 16], [0, 0, 16], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
@@ -57,9 +51,9 @@
module {
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{expected only parallel dims to be set in the second tiling level, got 2-th tile size set}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -68,21 +62,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [8, 0, 0], [0, 16, 16], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{only reduction dims to be set in the third tiling level, got 1-th tile size set}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -90,21 +82,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [{sizes = [4, 8], interchange = [1]}, [8, 8, 0], [0, 0, 8], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{expected [0, 2) to be set exactly once in interchange #0}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -112,21 +102,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 7, 7, 64, 0, 0, 0], [6, 1, 7, 32, 0, 0, 0], [0, 0, 0, 0, 3, 3, 4], [0, 0, 0, 0, 0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<36x9x9x512xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x3x512x512xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<36x7x7x512xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<36x9x9x512xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x3x512x512xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<36x7x7x512xf32>
// expected-error @+1 {{can't decompose the conv op}}
linalg.conv_2d_nhwc_hwcf {lowering_config = #config} ins(%0, %1 : memref<36x9x9x512xf32>, memref<3x3x512x512xf32>) outs(%2 : memref<36x7x7x512xf32>)
return
@@ -134,12 +122,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 7, 64, 0, 0], [1, 1, 7, 8, 0, 0], [0, 0, 0, 0, 5, 5], [0, 0, 0, 0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUConvTileAndDecomposeExpert>
@@ -147,9 +133,9 @@
module {
func.func @illegal() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32>
// expected-error @+1 {{can't decompose the conv op}}
linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : tensor<2xi64>, lowering_config = #config, strides = dense<1> : tensor<2xi64>} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>)
return
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
index 10aa93a..a44a370 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-peel))" -split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @peel_static_matmul() {
%c16 = arith.constant 16 : index
@@ -16,9 +14,9 @@
%c512 = arith.constant 512 : index
%c128 = arith.constant 128 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x49xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<49x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x49xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<49x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir
index f9eae56..7769fc6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_arm_sme_streaming_mode_tests.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --iree-codegen-linalg-to-llvm-pipeline=enable-arm-sme --split-input-file %s | FileCheck %s
// RUN: iree-opt --iree-codegen-linalg-to-llvm-pipeline=enable-arm-sme --iree-llvmcpu-force-arm-streaming --split-input-file %s | FileCheck %s -check-prefixes=FORCE-ARM-STREAMING
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
module {
module {
@@ -16,7 +14,7 @@
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xf32>>
%2 = tensor.empty() : tensor<1xf32>
%3 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [1], [0], [0]]>}
ins(%cst : f32) outs(%2 : tensor<1xf32>) -> tensor<1xf32>
@@ -39,12 +37,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
module {
module {
@@ -54,7 +50,7 @@
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1xf32>>
%2 = tensor.empty() : tensor<1xf32>
%3 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0], [[1]], [0], [0]]>}
ins(%cst : f32) outs(%2 : tensor<1xf32>) -> tensor<1xf32>
@@ -78,12 +74,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
module {
module {
@@ -93,7 +87,7 @@
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<100x100xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<100x100xf32>>
%2 = tensor.empty() : tensor<100x100xf32>
%3 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0], [[4], [4]], [0, 0], [0, 0]]>}
ins(%cst : f32) outs(%2 : tensor<100x100xf32>) -> tensor<100x100xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
index e26a427..faca283 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1)>
@@ -15,9 +13,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%5 = tensor.empty() : tensor<24x512x16x1xf32>
@@ -47,12 +45,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1)>
@@ -62,9 +58,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>> -> tensor<24x32x16x16xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
%5 = tensor.empty() : tensor<384x512xf32>
@@ -93,19 +89,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
module {
func.func @unaligned_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<383x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<383x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [383, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<383x512xf32>> -> tensor<383x512xf32>
%3 = tensor.empty() : tensor<24x512x16x1xf32>
%pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir
index b619559..71c88e4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_conv_tests.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
func.func @pad_conv_2d_nchw_fchw_1x320x64x64x320x3x3() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
@@ -23,12 +21,12 @@
%7 = arith.index_castui %2 {stream.alignment = 256 : index, stream.values = [10507520 : index, 21488640 : index]} : i32 to index
%8 = arith.index_castui %3 {stream.alignment = 256 : index, stream.values = [10508800 : index, 21489920 : index]} : i32 to index
%9 = arith.index_castui %4 {stream.alignment = 128 : index, stream.values = [10486400 : index, 10487680 : index]} : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c5243520) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320x64x64xf32>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf32>>
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
- %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c5243520) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320x64x64xf32>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf32>>
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x320xf32>>
+ %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<1x320x64x64xf32>>
%16 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [1, 320, 64, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x320x64x64xf32>> -> tensor<1x320x64x64xf32>
%17 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf32>> -> tensor<320x320x3x3xf32>
%18 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [1, 320], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x320xf32>> -> tensor<1x320xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir
index ab8b261..899c422 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pad_tests.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --pass-pipeline="builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @pad_only_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c634816 = arith.constant 634816 : index
%c3846080 = arith.constant 3846080 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor<writeonly:tensor<1x114x114x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor<writeonly:tensor<1x114x114x64xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>> -> tensor<1x112x112x64xf32>
%padded = tensor.pad %2 low[0, 1, 1, 0] high[0, 1, 1, 0] {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index):
@@ -47,12 +45,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -64,10 +60,10 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 1.001000e-05 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c802816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x56x56x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c72545728) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x256x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c72676800) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x30x30x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c802816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x56x56x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c72545728) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x256x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c72676800) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x30x30x128xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 56, 56, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x56x56x256xf32>> -> tensor<1x56x56x256xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1, 1, 256, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x256x128xf32>> -> tensor<1x1x256x128xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
@@ -128,20 +124,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @pad_consumer_fusion_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x14x14x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x256x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x14x14x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x14x14x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x256x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x14x14x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x14x14x256xf32>> -> tensor<1x14x14x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 256, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x256x256xf32>> -> tensor<3x3x256x256xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [1, 14, 14, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<1x14x14x256xf32>> -> tensor<1x14x14x256xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
index e8602dc..d3c411e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_peel_and_vectorize_tests.mlir
@@ -1,20 +1,18 @@
// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-llvmcpu-lower-executable-target))' -split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {native_vector_size = 64}>
func.func @no_peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<64x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<64x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x64xf32>> -> tensor<128x64xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [64, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x512xf32>> -> tensor<64x512xf32>
%5 = tensor.empty() : tensor<128x512xf32>
@@ -34,21 +32,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[65, 65, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {native_vector_size = 64}>
func.func @peel_static_matmul() attributes {hal.executable.target = #executable_target_system_elf_x86_64_, translation_info = #translation} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x49xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<49x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x49xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<49x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x49xf32>> -> tensor<128x49xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [49, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<49x512xf32>> -> tensor<49x512xf32>
%5 = tensor.empty() : tensor<128x512xf32>
@@ -80,12 +76,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
@@ -98,9 +92,9 @@
%3 = arith.index_cast %0 : i32 to index
%4 = arith.index_cast %1 : i32 to index
%5 = arith.index_cast %2 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%4, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3} -> tensor<?x?xf32>
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5} -> tensor<?x?xf32>
%11 = tensor.empty(%4, %5) : tensor<?x?xf32>
@@ -140,12 +134,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 0], [8, [32], 0], [0, 0, 1], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling = true}>
@@ -159,9 +151,9 @@
%3 = arith.index_cast %0 : i32 to index
%4 = arith.index_cast %1 : i32 to index
%5 = arith.index_cast %2 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%4, %5}
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [%4, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%4, %3} -> tensor<?x?xf32>
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [%3, %5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %5} -> tensor<?x?xf32>
%11 = tensor.empty(%4, %5) : tensor<?x?xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
index 3d4f9c7..639f813 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_split_reduction_tests.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=false --split-input-file %s | FileCheck %s
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --iree-llvmcpu-reassociate-fp-reductions=true --split-input-file %s | FileCheck %s --check-prefix=REORDERCHECK
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -15,8 +13,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0> : tensor<1024x512xi32>
%c1_i32 = arith.constant 1 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>> -> tensor<1024x512x256xi32>
%3 = tensor.empty() : tensor<1024x512xi32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024x512xi32>) {
@@ -48,11 +46,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -62,8 +58,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0.000000e+00> : tensor<1024x512xf32>
%cst_0 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x256xf32>> -> tensor<1024x512x256xf32>
%3 = tensor.empty() : tensor<1024x512xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xf32>) outs(%cst : tensor<1024x512xf32>) {
@@ -98,11 +94,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -112,8 +106,8 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x?x256xi32>>{%1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x?xi32>>{%1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x?x256xi32>>{%1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x?xi32>>{%1}
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [1024, %1, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x?x256xi32>>{%1} -> tensor<1024x?x256xi32>
%5 = tensor.empty(%1) : tensor<1024x?xi32>
%6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<1024x?xi32>) -> tensor<1024x?xi32>
@@ -140,11 +134,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -152,8 +144,8 @@
func.func @split_reduction_innermost_reduction_next_imperfect_tiling_supported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0> : tensor<1024x513xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x513x256xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x513xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x513x256xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x513xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 513, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x513x256xi32>> -> tensor<1024x513x256xi32>
%3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x513x256xi32>) outs(%cst : tensor<1024x513xi32>) {
^bb0(%in: i32, %out: i32):
@@ -178,11 +170,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -192,8 +182,8 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x?xi32>>{%1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x?xi32>>{%1}
%4 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0], sizes = [1024, 512, %1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x?xi32>>{%1} -> tensor<1024x512x?xi32>
%5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%4 : tensor<1024x512x?xi32>) outs(%cst : tensor<1024x512xi32>) {
^bb0(%in: i32, %out: i32):
@@ -209,11 +199,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -221,8 +209,8 @@
func.func @split_reduction_innermost_imperfect_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0> : tensor<1024x512xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x257xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x257xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 257], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x257xi32>> -> tensor<1024x512x257xi32>
%3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x257xi32>) outs(%cst : tensor<1024x512xi32>) {
^bb0(%in: i32, %out: i32):
@@ -238,11 +226,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
@@ -250,8 +236,8 @@
func.func @split_reduction_not_innermost_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0> : tensor<1024x256xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x256xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x256xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>> -> tensor<1024x512x256xi32>
%3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024x256xi32>) {
^bb0(%in: i32, %out: i32):
@@ -268,11 +254,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -280,8 +264,8 @@
func.func @split_reduction_double_reduction_unsupported() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0> : tensor<1024xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1024, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512x256xi32>> -> tensor<1024x512x256xi32>
%3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction", "reduction"]} ins(%2 : tensor<1024x512x256xi32>) outs(%cst : tensor<1024xi32>) {
^bb0(%in: i32, %out: i32):
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
index f646cee..4d91d67 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
@@ -6,11 +6,9 @@
// and the conversion to destination passing style. Running CSE
// before hoists the fill and the empty out of the loop causing
// issues with the conversion.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -23,8 +21,8 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 10752 : index]} : i32 to index
%3 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [10752 : index, 21504 : index]} : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%2) : !flow.dispatch.tensor<readonly:tensor<7x384xf32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%3) : !flow.dispatch.tensor<writeonly:tensor<7xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%2) : !flow.dispatch.tensor<readonly:tensor<7x384xf32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%3) : !flow.dispatch.tensor<writeonly:tensor<7xf32>>
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [7, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<7x384xf32>> -> tensor<7x384xf32>
%7 = tensor.empty() : tensor<7xf32>
%8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<7xf32>) -> tensor<7xf32>
@@ -51,12 +49,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -70,10 +66,10 @@
%5 = arith.index_castui %1 {stream.alignment = 64 : index, stream.values = [576704 : index, 1763072 : index]} : i32 to index
%6 = arith.index_castui %2 {stream.alignment = 64 : index, stream.values = [908480 : index, 2094848 : index]} : i32 to index
%7 = arith.index_castui %3 {stream.alignment = 128 : index, stream.values = [2304 : index, 134016 : index]} : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x576xf32>>
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<576x144xf32>>
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x144xf32>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<1x144xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x576xf32>>
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<576x144xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x144xf32>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<1x144xf32>>
%12 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [1, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x576xf32>> -> tensor<1x576xf32>
%13 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [576, 144], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<576x144xf32>> -> tensor<576x144xf32>
%14 = flow.dispatch.tensor.load %10, offsets = [0, 0], sizes = [1, 144], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x144xf32>> -> tensor<1x144xf32>
@@ -103,12 +99,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @batch_matmul_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
@@ -126,9 +120,9 @@
%9 = arith.index_cast %3 : i32 to index
%10 = arith.index_cast %4 : i32 to index
%11 = arith.index_cast %5 : i32 to index
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%6, %7, %9}
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%10, %11, %8}
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%6, %7, %8}
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%6, %7, %9}
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%10, %11, %8}
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%6, %7, %8}
%15 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0], sizes = [%6, %7, %9], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%6, %7, %9} -> tensor<?x?x?xf32>
%16 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [%10, %11, %8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%10, %11, %8} -> tensor<?x?x?xf32>
%17 = tensor.empty(%6, %7, %8) : tensor<?x?x?xf32>
@@ -142,20 +136,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0 * 1536 + d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
func.func @check_buffer_ops_vectorization() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x1024xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x1024xi32>
memref.assume_alignment %0, 64 : memref<128x1024xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x1536xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x1536xi32>
memref.assume_alignment %1, 64 : memref<128x1536xi32>
%subview = memref.subview %1[0, 0] [128, 1024] [1, 1] : memref<128x1536xi32> to memref<128x1024xi32, #map>
linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<128x1024xi32>) outs(%subview : memref<128x1024xi32, #map>) {
@@ -171,12 +163,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2, d3) -> (d3)>
@@ -188,9 +178,9 @@
%cst_2 = arith.constant 0.166666672 : f32
%cst_3 = arith.constant dense<0.000000e+00> : tensor<16xf32>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>> -> tensor<3x3x3x16xf32>
%5 = tensor.empty() : tensor<1x112x112x16xf32>
@@ -221,15 +211,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>,
- #hal.descriptor_set.binding<5, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -238,12 +226,12 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 1.000000e-03 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x256xf32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(5) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x256xf32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(5) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x256xf32>>
%6 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x128xf32>> -> tensor<64x128xf32>
%7 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%8 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [256], strides = [1] : !flow.dispatch.tensor<readonly:tensor<256xf32>> -> tensor<256xf32>
@@ -268,19 +256,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf", ukernels = "mmt4d"}>
func.func @ukernel_dispatch() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4x8x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x4x16x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x16x8x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4x8x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x4x16x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x16x8x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 4, 8, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x8x32xf32>> -> tensor<2x4x8x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [16, 4, 16, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4x16x32xf32>> -> tensor<16x4x16x32xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 16, 8, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<2x16x8x16xf32>> -> tensor<2x16x8x16xf32>
@@ -301,12 +287,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf", ukernels = "all"}>
#map = affine_map<()[s0, s1, s2] -> (s0 - s1 * (s0 ceildiv s2), s0 ceildiv s2)>
@@ -317,9 +301,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%2}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?xf32>>{%2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%2}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?xf32>>{%2}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%7 = affine.min #map()[%2, %workgroup_id_x, %workgroup_count_x]
@@ -332,11 +316,11 @@
return
}
// CHECK: func @dispatch
-// CHECK: %[[INPUT0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[INPUT0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK-SAME: memref<?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[INPUT1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[INPUT1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-SAME: memref<?xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-SAME: memref<?xf32, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[OFFSET:.+]] = affine.apply
// CHECK-DAG: %[[SIZE:.+]] = affine.min
@@ -349,12 +333,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 2, 0, 0], [1, 1, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]]>
#config1 = #iree_codegen.lowering_config<tile_sizes = [[1, 2, 0, 0, 0, 0], [1, 1, 0, 1, 128, 0], [0, 0, 1, 0, 0, 1]]>
@@ -364,9 +346,9 @@
%c1024 = arith.constant 1024 : index
%c132096 = arith.constant 132096 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x256x1x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1024) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4x256x128x1xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c132096) : !flow.dispatch.tensor<writeonly:tensor<1x4x1x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x256x1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1024) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4x256x128x1xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c132096) : !flow.dispatch.tensor<writeonly:tensor<1x4x1x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 256, 1, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x256x1x1xf32>> -> tensor<1x256x1x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 256, 128, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x256x128x1xi8>> -> tensor<4x256x128x1xi8>
%5 = tensor.empty() : tensor<1x4x1x128xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir
index 97ef7fc..77b4078 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_transpose_avx2_tests.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1, d0)>
@@ -12,8 +10,8 @@
func.func @transpose_10_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) {
@@ -37,11 +35,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
@@ -49,8 +45,8 @@
func.func @transpose_021_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x128x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x128x96xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>> -> tensor<64x96x128xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 128, 96], strides = [1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<64x128x96xf32>> -> tensor<64x128x96xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<64x128x96xf32>) {
@@ -74,11 +70,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
@@ -86,8 +80,8 @@
func.func @transpose_201_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x64x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x64x96xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>> -> tensor<64x96x128xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 64, 96], strides = [1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<128x64x96xf32>> -> tensor<128x64x96xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<128x64x96xf32>) {
@@ -111,11 +105,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>
@@ -123,8 +115,8 @@
func.func @transpose_210_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x96x64xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>> -> tensor<64x96x128xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 96, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<128x96x64xf32>> -> tensor<128x96x64xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<128x96x64xf32>) {
@@ -148,11 +140,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
@@ -160,8 +150,8 @@
func.func @transpose_120_8x8_pattern() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<96x128x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<96x128x64xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>> -> tensor<64x96x128xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [96, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<96x128x64xf32>> -> tensor<96x128x64xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<96x128x64xf32>) {
@@ -185,11 +175,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
@@ -197,8 +185,8 @@
func.func @transpose_102() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<96x64x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<96x64x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 96, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x96x128xf32>> -> tensor<64x96x128xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [96, 64, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<96x64x128xf32>> -> tensor<96x64x128xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<64x96x128xf32>) outs(%3 : tensor<96x64x128xf32>) {
@@ -215,11 +203,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1, d0)>
@@ -227,8 +213,8 @@
func.func @test_no_avx2_feature() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir
index c93ab70..005492f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vector_masking_tests.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' -split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -15,9 +13,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%9 = tensor.empty(%2, %3) : tensor<?x?xf32>
@@ -43,11 +41,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -58,8 +54,8 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%2}
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%7 = tensor.empty(%2) : tensor<?xf32>
%8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<?xf32>) -> tensor<?xf32>
@@ -78,12 +74,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 32 : index, target_triple = "riscv32-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -93,9 +87,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%9 = tensor.empty(%2, %3) : tensor<?x?xf32>
@@ -121,12 +115,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -136,9 +128,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%9 = tensor.empty(%2, %3) : tensor<?x?xf32>
@@ -159,13 +151,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @mask_matmul_sve() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -174,10 +164,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -193,12 +183,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -208,9 +196,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%2, %3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %3} -> tensor<?x?xf32>
%9 = tensor.empty(%2, %3) : tensor<?x?xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir
index 1e2e60d..f6782a5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_vectorize_nd_extract_tests.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy, func.func(iree-llvmcpu-lower-executable-target))' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_riscv_64_ = #hal.executable.target<"llvm-cpu", "system-elf-riscv_64", {cpu = "generic-rv64", cpu_features = "+m,+a,+f,+d,+v", data_layout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", native_vector_size = 64 : index, target_triple = "riscv64"}>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -19,8 +17,8 @@
%c32_i32 = arith.constant 32 : i32
%cst_2 = arith.constant 1.000000e+00 : f32
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1115136) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x33x33x21xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x257x257x21xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1115136) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x33x33x21xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x257x257x21xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 21], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x33x33x21xf32>> -> tensor<1x33x33x21xf32>
%3 = tensor.empty() : tensor<1x257x257x21xf32>
%4 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%3 : tensor<1x257x257x21xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
index 410d801..1903cab 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @matmul_tensors_default() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -15,10 +13,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -35,13 +33,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @i4_i4_i32_matmul() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -50,10 +46,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%0, %2} -> tensor<?x?xi4>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi4>>{%2, %1} -> tensor<?x?xi4>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
@@ -71,12 +67,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @batch_matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -85,9 +79,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %1, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %3, %2}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%0, %1, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %1, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %3, %2}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?xf32>>{%0, %1, %2}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [%0, %1, %3], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %1, %3} -> tensor<?x?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [%0, %3, %2], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?xf32>>{%0, %3, %2} -> tensor<?x?x?xf32>
%9 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
@@ -105,19 +99,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @matmul_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<240x40xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x40xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<240x40xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x40xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 240], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<196x240xf32>> -> tensor<196x240xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [240, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<240x40xf32>> -> tensor<240x40xf32>
%5 = tensor.empty() : tensor<196x40xf32>
@@ -135,21 +127,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @conv_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c607520 = arith.constant 607520 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x51x41x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor<readonly:tensor<3x3x512x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x25x20x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x51x41x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor<readonly:tensor<3x3x512x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x25x20x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 51, 41, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x51x41x512xf32>> -> tensor<1x51x41x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 512, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x512x512xf32>> -> tensor<3x3x512x512xf32>
%5 = tensor.empty() : tensor<1x25x20x512xf32>
@@ -166,19 +156,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @restrict_num_workgroups() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x11x11x576xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<5x5x576xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x7x7x576xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x11x11x576xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<5x5x576xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x7x7x576xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 11, 11, 576], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x11x11x576xf32>> -> tensor<1x11x11x576xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [5, 5, 576], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<5x5x576xf32>> -> tensor<5x5x576xf32>
%5 = tensor.empty() : tensor<1x7x7x576xf32>
@@ -196,20 +184,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @matmul_aarch_i8_i8_i32_static() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x1536xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x1536xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384xi8>> -> tensor<128x384xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1536], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>> -> tensor<384x1536xi8>
%5 = tensor.empty() : tensor<128x1536xi32>
@@ -227,12 +213,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @matmul_aarch_i8_i8_i32_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
@@ -240,9 +224,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1}
%6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%0, %2} -> tensor<?x?xi8>
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi8>>{%2, %1} -> tensor<?x?xi8>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?xi32>>{%0, %1} -> tensor<?x?xi32>
@@ -259,18 +243,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @pack() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x48x8x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x48x8x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf32>> -> tensor<20x40xf32>
%3 = tensor.empty() : tensor<4x48x8x1xf32>
%pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32>
@@ -286,11 +268,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
@@ -304,8 +284,8 @@
%5 = arith.index_castui %1 : i32 to index
%6 = arith.index_castui %2 : i32 to index
%7 = arith.index_castui %3 : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
%11 = tensor.empty(%6, %7) : tensor<?x?xi32>
%unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
@@ -321,21 +301,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @mmt4d_384x384x512_4x1x4_dispatch_0() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
%c96 = arith.constant 96 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<96x384x4x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<128x384x4x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<96x128x4x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<96x384x4x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<128x384x4x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<96x128x4x4xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [96, 384, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<96x384x4x1xf32>> -> tensor<96x384x4x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [128, 384, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384x4x1xf32>> -> tensor<128x384x4x1xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [96, 384, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<96x128x4x4xf32>> -> tensor<96x128x4x4xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir
index 11dc420..eb84c70 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sme_lowering_strategy.mlir
@@ -1,16 +1,14 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --iree-llvmcpu-enable-scalable-vectorization=true --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_f32() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf32>> -> tensor<32x32xf32>
%3 = tensor.empty() : tensor<32x32xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf32>) outs(%3 : tensor<32x32xf32>) {
@@ -30,17 +28,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_f64() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf64>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf64>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf64>> -> tensor<32x32xf64>
%3 = tensor.empty() : tensor<32x32xf64>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf64>) outs(%3 : tensor<32x32xf64>) {
@@ -60,17 +56,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_unsupported_not_rank_2() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x8x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x8x4xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 4, 8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x8xf32>> -> tensor<2x4x8xf32>
%3 = tensor.empty() : tensor<2x8x4xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<2x4x8xf32>) outs(%3 : tensor<2x8x4xf32>) {
@@ -90,17 +84,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @transpose_unsupported_not_simple_transpose() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf32>> -> tensor<32x32xf32>
%3 = tensor.empty() : tensor<32x32xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<32x32xf32>) outs(%3 : tensor<32x32xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir
index 139f196..757a039 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy.mlir
@@ -3,13 +3,11 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' \
// RUN: --iree-llvmcpu-enable-scalable-vectorization=true --split-input-file --iree-llvmcpu-disable-arm-sme-tiling %s | FileCheck %s --check-prefixes=CHECK,DISABLE-ARM-SME
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -18,10 +16,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -39,19 +37,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<15x14xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<14x7xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<15x14xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<14x7xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 14], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<15x14xf32>> -> tensor<15x14xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [14, 7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<14x7xf32>> -> tensor<14x7xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [15, 7], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>> -> tensor<15x7xf32>
@@ -69,19 +65,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1xf32>> -> tensor<1x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1xf32>> -> tensor<1x1xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>> -> tensor<1x1xf32>
@@ -99,13 +93,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve,+sme", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -114,10 +106,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -142,12 +134,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {cpu = "", cpu_features = "+v9a,+sve", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", link_embedded = false, native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android34"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -165,11 +155,11 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x256xi8>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256xf32>>
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<1024x256xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x256xi8>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256xf32>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<1024x256xf32>>
%15 = flow.dispatch.tensor.load %10, offsets = [0, 0], sizes = [1024, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x256xi8>> -> tensor<1024x256xi8>
%16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
%17 = flow.dispatch.tensor.load %12, offsets = [0], sizes = [1024], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1024xf32>> -> tensor<1024xf32>
@@ -201,19 +191,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {cpu = "", cpu_features = "+v9a,+sve", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", link_embedded = false, native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android34"}>
func.func @depthwise_conv() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>> -> tensor<1x57x57x72xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>> -> tensor<3x3x72xf32>
%5 = tensor.empty() : tensor<1x28x28x72xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
index 02ad9c9..ec9241b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_sve_lowering_strategy_peeling.mlir
@@ -2,13 +2,11 @@
// RUN: --iree-llvmcpu-enable-scalable-vectorization=true --iree-llvmcpu-vector-pproc-strategy=peel \
// RUN: --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @matmul_tensors() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
@@ -17,10 +15,10 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%7 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%0, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2} -> tensor<?x?xf32>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1} -> tensor<?x?xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
@@ -38,19 +36,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @static_tensors_non_pow_two_sizes() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<15x14xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<14x7xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<15x14xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<14x7xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 14], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<15x14xf32>> -> tensor<15x14xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [14, 7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<14x7xf32>> -> tensor<14x7xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [15, 7], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<15x7xf32>> -> tensor<15x7xf32>
@@ -68,19 +64,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu_features = "+sve", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-elf"}>
func.func @static_tensors_1x1() attributes {hal.executable.target = #executable_target_embedded_elf_arm_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1xf32>> -> tensor<1x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1xf32>> -> tensor<1x1xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>> -> tensor<1x1xf32>
@@ -99,19 +93,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
func.func @depthwise_conv() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x4xf32>>
%input = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 1, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x4x4xf32>> -> tensor<1x1x4x4xf32>
%filter = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 4, 4], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4x4xf32>> -> tensor<1x4x4xf32>
%5 = tensor.empty() : tensor<1x1x1x4xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
index fe4c802..3042c00 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --iree-llvmcpu-disable-distribution --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @matmul_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x128xf32>> -> tensor<512x128xf32>
%5 = tensor.empty() : tensor<384x128xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
index 07769ef..02095fc 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {cpu_features = "+m,+f", data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 16 : index, target_triple = "riscv32-none-elf"}>
func.func @matmul_riscv() attributes {hal.executable.target = #executable_target_embedded_elf_riscv_32_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x128xf32>> -> tensor<512x128xf32>
%5 = tensor.empty() : tensor<384x128xf32>
@@ -32,19 +30,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_riscv_32_ = #hal.executable.target<"llvm-cpu", "embedded-elf-riscv_32", {cpu_features = "+m,+f", data_layout = "e-m:e-p:32:32-i64:64-n32-S128", native_vector_size = 16 : index, target_triple = "riscv32-none-elf"}>
func.func @thin_depthwise_conv_static() attributes {hal.executable.target = #executable_target_embedded_elf_riscv_32_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>> -> tensor<1x57x57x72xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>> -> tensor<3x3x72xf32>
%5 = tensor.empty() : tensor<1x28x28x72xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
index 2e21413..e67e7af 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt --pass-pipeline='builtin.module(iree-llvmcpu-select-lowering-strategy)' --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @matvec_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384xf32>> -> tensor<128x384xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<384xf32>> -> tensor<384xf32>
%5 = tensor.empty() : tensor<128xf32>
@@ -32,12 +30,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @matvec_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
@@ -49,9 +45,9 @@
%3 = arith.index_cast %0 : i32 to index
%4 = arith.index_cast %1 : i32 to index
%5 = arith.index_cast %2 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %4}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%5}
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%3, %4}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%5}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%3}
%9 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%10 = arith.index_cast %9 : i32 to index
%11 = flow.dispatch.tensor.load %8, offsets = [0], sizes = [%10], strides = [1] : !flow.dispatch.tensor<writeonly:tensor<?xf32>>{%3} -> tensor<?xf32>
@@ -72,20 +68,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @dot_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<384xf32>> -> tensor<384xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<384xf32>> -> tensor<384xf32>
%5 = tensor.empty() : tensor<f32>
@@ -104,12 +98,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @dot_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
@@ -119,10 +111,10 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_cast %0 : i32 to index
%3 = arith.index_cast %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%5 = flow.dispatch.tensor.load %4, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<writeonly:tensor<f32>> -> tensor<f32>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%2}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%2}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%3}
%8 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [%2], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%2} -> tensor<?xf32>
%9 = flow.dispatch.tensor.load %7, offsets = [0], sizes = [%3], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%3} -> tensor<?xf32>
%10 = linalg.fill ins(%cst : f32) outs(%5 : tensor<f32>) -> tensor<f32>
@@ -140,12 +132,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -154,9 +144,9 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%6 = flow.dispatch.tensor.load %3, offsets = [0], sizes = [%1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%1} -> tensor<?xf32>
%7 = tensor.empty(%0, %1) : tensor<?x?xf32>
@@ -177,12 +167,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -191,9 +179,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
%9 = tensor.empty(%0, %1, %2, %3) : tensor<?x?x?x?xf32>
@@ -215,18 +203,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @add_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x16x32x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x16x32x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x16x32x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x16x32x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 16, 32, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x16x32x128xf32>> -> tensor<64x16x32x128xf32>
%3 = tensor.empty() : tensor<64x16x32x128xf32>
%4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x16x32x128xf32>) outs(%3 : tensor<64x16x32x128xf32>) {
@@ -247,12 +233,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 0], [64, 64, 0], [0, 0, 0], [32, 32, 0], [0, 0, 32], [0, 0, 0]]>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert, {enable_loop_peeling}>
@@ -262,9 +246,9 @@
translation_info = #translation
} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x512xf32>> -> tensor<256x512xf32>
%5 = tensor.empty() : tensor<128x512xf32>
@@ -283,19 +267,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @matmul_partially_peel() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16641x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16641x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16641x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16641x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16641, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16641x16xf32>> -> tensor<16641x16xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x8xf32>> -> tensor<16x8xf32>
%5 = tensor.empty() : tensor<16641x8xf32>
@@ -314,11 +296,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -329,8 +309,8 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xi32>{%0, %1}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xi32>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xi32>{%0, %1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xi32>{%2, %3}
%subview = memref.subview %7[%4, %5] [%0, %1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset: ?>>
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%6 : memref<?x?xi32>) outs(%subview : memref<?x?xi32, strided<[?, 1], offset: ?>>) {
^bb0(%in: i32, %out: i32):
@@ -348,11 +328,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
@@ -360,8 +338,8 @@
%c2 = arith.constant 2 : index
%cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32>
%cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32>
@@ -379,11 +357,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @static_3d_fft_stage3() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
@@ -392,8 +368,8 @@
%cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32>
%0 = bufferization.to_memref %cst_0 : memref<4xf32>
%1 = bufferization.to_memref %cst : memref<4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32>
iree_linalg_ext.fft ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>)
return
}
@@ -407,12 +383,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -424,9 +398,9 @@
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %2}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %1}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<?x?xf32>>{%0, %1}
%6 = tensor.empty(%0, %1) : tensor<?x?xf32>
%7 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%6 : tensor<?x?xf32>) {
^bb0(%out: f32):
@@ -456,12 +430,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @conv_dynamic() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
@@ -474,9 +446,9 @@
%6 = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
%7 = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index
%8 = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%4, %5, %3, %6}
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xf32>>{%0, %7, %8, %6}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3}
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%4, %5, %3, %6}
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xf32>>{%0, %7, %8, %6}
%12 = flow.dispatch.tensor.load %9, offsets = [0, 0, 0, 0], sizes = [%0, %1, %2, %3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%0, %1, %2, %3} -> tensor<?x?x?x?xf32>
%13 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0, 0], sizes = [%4, %5, %3, %6], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xf32>>{%4, %5, %3, %6} -> tensor<?x?x?x?xf32>
%14 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [%0, %7, %8, %6], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xf32>>{%0, %7, %8, %6} -> tensor<?x?x?x?xf32>
@@ -494,20 +466,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c607520 = arith.constant 607520 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c607520) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>> -> tensor<3x3x3x16xf32>
%5 = tensor.empty() : tensor<1x112x112x16xf32>
@@ -525,20 +495,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @conv_nchw_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x128x30x30xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x128x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x128x28x28xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x128x30x30xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x128x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x128x28x28xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 128, 30, 30], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x128x30x30xf32>> -> tensor<1x128x30x30xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [128, 128, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x128x3x3xf32>> -> tensor<128x128x3x3xf32>
%5 = tensor.empty() : tensor<1x128x28x28xf32>
@@ -556,19 +524,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @depthwise_conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x161x161x240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x240xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x80x80x240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x161x161x240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x240xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x80x80x240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x161x161x240xf32>> -> tensor<1x161x161x240xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x240xf32>> -> tensor<3x3x240xf32>
%5 = tensor.empty() : tensor<1x80x80x240xf32>
@@ -587,19 +553,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu"}>
func.func @thin_depthwise_conv_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x72xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 161, 161, 240], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x57x57x72xf32>> -> tensor<1x57x57x72xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 240], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x72xf32>> -> tensor<3x3x72xf32>
%5 = tensor.empty() : tensor<1x28x28x72xf32>
@@ -618,19 +582,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "+mmx,+popcnt,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+avx,+avx2,+fma,+avx512f,+bmi,+bmi2,+aes,+pclmul,+avx512vl,+avx512bw,+avx512dq,+avx512cd,+avx512vnni,+adx,+clflushopt,+clwb,+cx16,+cx8,+crc32,+f16c,+fsgsbase,+fxsr,+invpcid,+lzcnt,+movbe,+pku,+prfchw,+rdrnd,+rdseed,+sahf,+x87,+xsave,+xsavec,+xsaveopt,+xsaves", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-none-elf", ukernels = false}>
func.func @pooling_nchw_max() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c3846080 = arith.constant 3846080 : index
%c0 = arith.constant 0 : index
%cst = arith.constant -3.40282347E+38 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c3846080) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x64x114x114xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x64x56x56xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c3846080) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x64x114x114xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x64x56x56xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 64, 114, 114], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x64x114x114xf32>> -> tensor<1x64x114x114xf32>
%3 = tensor.empty() : tensor<1x64x56x56xf32>
%4 = tensor.empty() : tensor<3x3xf32>
@@ -649,18 +611,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-pc-linux-gnu"}>
#map = affine_map<(d0, d1) -> (d1, d0)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
func.func @generic_static() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<96x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<96x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16x96xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [96, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<96x16xf32>> -> tensor<96x16xf32>
%3 = tensor.empty() : tensor<16x96xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<96x16xf32>) outs(%3 : tensor<16x96xf32>) {
@@ -679,19 +639,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @matmul_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x128xf32>> -> tensor<512x128xf32>
%5 = tensor.empty() : tensor<384x128xf32>
@@ -745,20 +703,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @matmul_i8_i8_i32_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x1536xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x1536xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384xi8>> -> tensor<128x384xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1536], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x1536xi8>> -> tensor<384x1536xi8>
%5 = tensor.empty() : tensor<128x1536xi32>
@@ -777,21 +733,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @gemm_unit_N() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x1xf32>>{%0}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x1xf32>>{%0}
%5 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [%1, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%1} -> tensor<?x1xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, %1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%0, %1} -> tensor<?x?xf32>
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%0, 1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x1xf32>>{%0} -> tensor<?x1xf32>
@@ -809,20 +763,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @gemm_unit_M_unit_N() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%0}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%0}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, %0], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?xf32>>{%0} -> tensor<1x?xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [%0, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x1xf32>>{%0} -> tensor<?x1xf32>
%6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [1, 1], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<1x1xf32>> -> tensor<1x1xf32>
@@ -840,22 +792,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @matmul_odd() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x49xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x49xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<33x49xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x49xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x49xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<33x49xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [33, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<33x16xf32>> -> tensor<33x16xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x49xf32>> -> tensor<16x49xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [33, 49], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<33x49xf32>> -> tensor<33x49xf32>
@@ -875,11 +825,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>
@@ -889,8 +837,8 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3}
%6 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [1, %0, 1, 1, %1, %2, 1, %3], strides = [1, 1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x?x1x1x?x?x1x?xf32>>{%0, %1, %2, %3} -> tensor<1x?x1x1x?x?x1x?xf32>
%7 = tensor.empty(%0, %1, %2, %3) : tensor<1x?x1x1x?x?x1x?xf32>
%8 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<1x?x1x1x?x?x1x?xf32>) outs(%7 : tensor<1x?x1x1x?x?x1x?xf32>) {
@@ -910,11 +858,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0) -> (d0)>
@@ -922,8 +868,8 @@
func.func @reduce_to_scalar_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
%3 = tensor.empty() : tensor<f32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<f32>) -> tensor<f32>
@@ -945,11 +891,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0) -> (d0)>
@@ -957,8 +901,8 @@
func.func @reduce_to_scalar_dynamic() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<f32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<f32>>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [%0], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf32>>{%0} -> tensor<?xf32>
%4 = flow.dispatch.tensor.load %2, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readwrite:tensor<f32>> -> tensor<f32>
%5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%3 : tensor<?xf32>) outs(%4 : tensor<f32>) {
@@ -978,18 +922,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<() -> ()>
func.func @scalar() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<f32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<f32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<writeonly:tensor<f32>> -> tensor<f32>
%4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = []} ins(%2 : tensor<f32>) outs(%3 : tensor<f32>) {
@@ -1006,11 +948,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1, d0)>
@@ -1018,8 +958,8 @@
func.func @transpose_8x8() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) {
@@ -1035,11 +975,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx2,+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1, d0)>
@@ -1047,8 +985,8 @@
func.func @transpose_16x16() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<512x1024xf32>) outs(%3 : tensor<1024x512xf32>) {
@@ -1064,12 +1002,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -1079,9 +1015,9 @@
%c6144 = arith.constant 6144 : index
%c792576 = arith.constant 792576 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c792576) : !flow.dispatch.tensor<writeonly:tensor<12x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c792576) : !flow.dispatch.tensor<writeonly:tensor<12x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128x128xf32>> -> tensor<12x128x128xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [12, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128xf32>> -> tensor<12x128xf32>
%5 = tensor.empty() : tensor<12x128xf32>
@@ -1116,18 +1052,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf32>> -> tensor<20x40xf32>
%3 = tensor.empty() : tensor<2x48x16x1xf32>
%pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32>
@@ -1144,18 +1078,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @pack_f16() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<20x40xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf16>> -> tensor<20x40xf16>
%3 = tensor.empty() : tensor<2x48x16x1xf16>
%pack = tensor.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16>
@@ -1172,17 +1104,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @pack_many_elements() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1200x500000xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<31250x1200x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1200x500000xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<31250x1200x16x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1200, 500000], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1200x500000xf32>> -> tensor<1200x500000xf32>
%3 = tensor.empty() : tensor<31250x1200x16x1xf32>
%pack = tensor.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
@@ -1199,12 +1129,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1)>
@@ -1213,9 +1141,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>> -> tensor<24x32x16x16xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
%5 = tensor.empty() : tensor<24x512x16x1xf32>
@@ -1247,18 +1175,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @elem_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x384x8x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x384x8x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384xf32>> -> tensor<128x384xf32>
%3 = tensor.empty() : tensor<128x384xf32>
%4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<128x384xf32>) outs(%3 : tensor<128x384xf32>) {
@@ -1284,11 +1210,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}>
#map = affine_map<(d0, d1) -> (d1, d0)>
@@ -1297,8 +1221,8 @@
%c1579008 = arith.constant 1579008 : index
%c3147776 = arith.constant 3147776 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c1579008) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<30522x768xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3147776) : !flow.dispatch.tensor<writeonly:tensor<1908x768x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c1579008) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<30522x768xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3147776) : !flow.dispatch.tensor<writeonly:tensor<1908x768x16x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [30522, 768], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<30522x768xf32>> -> tensor<30522x768xf32>
%3 = tensor.empty() : tensor<768x30522xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<30522x768xf32>) outs(%3 : tensor<768x30522xf32>) {
@@ -1323,14 +1247,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -1341,11 +1263,11 @@
%cst = arith.constant -0.000000e+00 : f32
%cst_0 = arith.constant 1.024000e+03 : f32
%cst_1 = arith.constant 9.99999996E-13 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x1024x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x1024x16x1xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>> -> tensor<384x1024xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<384xf32>> -> tensor<384xf32>
%7 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [1024], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1024xf32>> -> tensor<1024xf32>
@@ -1396,12 +1318,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx2", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf", ukernels = false}>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -1409,9 +1329,9 @@
func.func @reduction_pack() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x24x16x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x24x16x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [384, 1024, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<384x1024x32xf32>> -> tensor<384x1024x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [384, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x1024xf32>> -> tensor<384x1024xf32>
%5 = tensor.empty() : tensor<1024x24x16x1xf32>
@@ -1445,18 +1365,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @unpack_static() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c41943040 = arith.constant 41943040 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c41943040) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x256x16x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c41943040) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x256x16x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x4096xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 256, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x256x16x16xf32>> -> tensor<64x256x16x16xf32>
%3 = tensor.empty() : tensor<1024x4096xf32>
%unpack = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32>
@@ -1473,12 +1391,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0)>
@@ -1486,9 +1402,9 @@
#map2 = affine_map<(d0, d1) -> (d0, d1)>
func.func @unpack_elem() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<48x64x8x2xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<48x64x8x2xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [48, 64, 8, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<48x64x8x2xf32>> -> tensor<48x64x8x2xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
%5 = tensor.empty() : tensor<128x384xf32>
@@ -1512,13 +1428,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d1)>
@@ -1529,10 +1443,10 @@
%c-128_i32 = arith.constant -128 : i32
%c127_i32 = arith.constant 127 : i32
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2304x24xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<24x144xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<144xi32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2304x144xi8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2304x24xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<24x144xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<144xi32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2304x144xi8>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2304, 24], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2304x24xi8>> -> tensor<2304x24xi8>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [24, 144], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<24x144xi8>> -> tensor<24x144xi8>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [144], strides = [1] : !flow.dispatch.tensor<readonly:tensor<144xi32>> -> tensor<144xi32>
@@ -1560,19 +1474,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = false}>
func.func @test() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64
%c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i64>> -> tensor<i64>
%extracted = tensor.extract %2[] : tensor<i64>
%3 = arith.muli %extracted, %c6364136223846793005_i64 : i64
@@ -1588,12 +1500,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_system_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "system-elf-x86_64", {cpu = "cascadelake", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", link_embedded = false, native_vector_size = 64 : index, target_triple = "x86_64-unknown-linux-gnu", ukernels = false}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -1601,9 +1511,9 @@
func.func @non_trivial_program() attributes {hal.executable.target = #executable_target_system_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x1x128x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x1x128x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [128, 1, 128, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x1x128x1xf32>> -> tensor<128x1x128x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x1xf32>> -> tensor<128x1xf32>
%5 = tensor.empty() : tensor<1x1xf32>
@@ -1631,12 +1541,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 32 : index, target_triple = "x86_64-unknown-unknown-eabi-elf", ukernels = true}>
func.func @batch_mmt4d() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
@@ -1657,9 +1565,9 @@
%11 = arith.shli %10, %c32_i64 : i64
%12 = arith.ori %9, %11 : i64
%13 = arith.index_castui %12 : i64 to index
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x10x32x8x1xf32>>
- %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32x4x1xf32>>
- %16 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%13) : !flow.dispatch.tensor<writeonly:tensor<128x10x80x8x4xf32>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x10x32x8x1xf32>>
+ %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32x4x1xf32>>
+ %16 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%13) : !flow.dispatch.tensor<writeonly:tensor<128x10x80x8x4xf32>>
%17 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 32, 8, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x10x32x8x1xf32>> -> tensor<128x10x32x8x1xf32>
%18 = flow.dispatch.tensor.load %15, offsets = [0, 0, 0, 0, 0], sizes = [128, 80, 32, 4, 1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x80x32x4x1xf32>> -> tensor<128x80x32x4x1xf32>
%19 = tensor.empty() : tensor<128x10x80x8x4xf32>
@@ -1676,20 +1584,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "cascadelake", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
func.func @mmt4d_with_large_reduction() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x18176x16x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<284x18176x16x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<7x284x16x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x18176x16x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<284x18176x16x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<7x284x16x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [7, 18176, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<7x18176x16x1xf32>> -> tensor<7x18176x16x1xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [284, 18176, 16, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<284x18176x16x1xf32>> -> tensor<284x18176x16x1xf32>
%5 = tensor.empty() : tensor<7x284x16x16xf32>
@@ -1706,19 +1612,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @pad_only() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c634816 = arith.constant 634816 : index
%c3846080 = arith.constant 3846080 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor<writeonly:tensor<1x114x114x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c634816) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3846080) : !flow.dispatch.tensor<writeonly:tensor<1x114x114x64xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x112x112x64xf32>> -> tensor<1x112x112x64xf32>
%padded = tensor.pad %2 low[0, 1, 1, 0] high[0, 1, 1, 0] {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index):
@@ -1738,11 +1642,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
@@ -1750,8 +1652,8 @@
native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @winograd_output_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>> -> tensor<8x8x2x6x6x128xf16>
%3 = tensor.empty() : tensor<2x36x36x128xf16>
%4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16>
@@ -1767,11 +1669,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
@@ -1779,8 +1679,8 @@
native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @winograd_input_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>> -> tensor<2x34x34x128xf16>
%3 = tensor.empty() : tensor<8x8x2x6x6x128xf16>
%4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16>
@@ -1796,11 +1696,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
@@ -1808,8 +1706,8 @@
native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @winograd_filter_transform() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>> -> tensor<3x3x64x128xf32>
%3 = tensor.empty() : tensor<8x8x64x128xf32>
%4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32>
@@ -1825,13 +1723,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
@@ -1840,10 +1736,10 @@
func.func @attention() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
%scale = arith.constant 0.125 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
@@ -1866,13 +1762,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {
cpu = "generic", cpu_features = "",
@@ -1880,10 +1774,10 @@
native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
func.func @elementwise_output_transposed() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<768xi64>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32xi64>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32x768xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<768xi64>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32xi64>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32x768xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i64>> -> tensor<i64>
%5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [768], strides = [1] : !flow.dispatch.tensor<readonly:tensor<768xi64>> -> tensor<768xi64>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readonly:tensor<32xi64>> -> tensor<32xi64>
@@ -1916,9 +1810,9 @@
module {
func.func @test_mod_vectorizing_strategy_peeling() attributes {hal.executable.target = #executable_target_system_elf_x86_64_}{
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<6xi32>>
- %1 = hal.interface.binding.subspan layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<6xi32>>
- %2 = hal.interface.binding.subspan layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>], flags = Indirect>]>) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<6xi32>>
+ %0 = hal.interface.binding.subspan layout(#hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer>]>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<6xi32>>
+ %1 = hal.interface.binding.subspan layout(#hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer>]>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<6xi32>>
+ %2 = hal.interface.binding.subspan layout(#hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer>]>) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<6xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor<readonly:tensor<6xi32>> -> tensor<6xi32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [6], strides = [1] : !flow.dispatch.tensor<readonly:tensor<6xi32>> -> tensor<6xi32>
%5 = tensor.empty() : tensor<6xi32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir
index 284dd8c..44c0292 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile_and_fuse.mlir
@@ -101,12 +101,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @shared_out_operand() {
%cst = arith.constant 0.000000e+00 : f32
@@ -117,10 +115,10 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_castui %0 {stream.alignment = 1024 : index, stream.values = [205824 : index, 795648 : index, 1385472 : index, 1975296 : index, 2565120 : index, 3154944 : index, 3744768 : index]} : i32 to index
%3 = arith.index_castui %1 {stream.alignment = 1024 : index, stream.values = [0 : index, 3072 : index, 6144 : index, 9216 : index, 12288 : index, 15360 : index, 18432 : index]} : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<391x384xf32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c600576) : !flow.dispatch.tensor<writeonly:tensor<391x384xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<391x384xf32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384x384xf32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<384xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c600576) : !flow.dispatch.tensor<writeonly:tensor<391x384xf32>>
%8 = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [391, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<391x384xf32>> -> tensor<391x384xf32>
%9 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [384, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x384xf32>> -> tensor<384x384xf32>
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<384xf32>> -> tensor<384xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
index 2c8388b..a2944d9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt %s --iree-transform-dialect-interpreter --transform-dialect-drop-schedule | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
func.func @pad_matmul_static_dispatch_0() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>> -> tensor<500x1020xf32>
%5 = tensor.empty() : tensor<250x1020xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir
index 71adee3..6f6ab10 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vector_lowering.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-llvmcpu-vector-lowering-pipeline))" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_391x384x384_f32() {
%cst = arith.constant 0.000000e+00 : f32
@@ -19,13 +17,13 @@
%cst_0 = arith.constant dense<0.000000e+00> : vector<8x32xf32>
%cst_1 = arith.constant dense<6.000000e+00> : vector<8x32xf32>
%alloca = memref.alloca() {alignment = 64 : i64} : memref<8x32xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32>
memref.assume_alignment %0, 64 : memref<391x384xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32>
memref.assume_alignment %1, 64 : memref<384x384xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32>
memref.assume_alignment %2, 64 : memref<384xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<391x384xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<391x384xf32>
memref.assume_alignment %3, 64 : memref<391x384xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -83,13 +81,11 @@
// Check that vector.loads whose elements are extracted and
// consumed in a scalar fashion are scalarized.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_scalar_loads() {
%cst = arith.constant 0.000000e+00 : f32
@@ -102,13 +98,13 @@
%cst_0 = arith.constant dense<0.000000e+00> : vector<8x32xf32>
%cst_1 = arith.constant dense<6.000000e+00> : vector<8x32xf32>
%alloca = memref.alloca() {alignment = 64 : i64} : memref<8x32xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<391x384xf32>
memref.assume_alignment %0, 64 : memref<391x384xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<384x384xf32>
memref.assume_alignment %1, 64 : memref<384x384xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : memref<384xf32>
memref.assume_alignment %2, 64 : memref<384xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<391x384xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<391x384xf32>
memref.assume_alignment %3, 64 : memref<391x384xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -146,16 +142,14 @@
// Make sure we don't transpose a mask but create a transposed mask instead.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transpose_mask() {
%a = arith.constant 4 : index
%b = arith.constant 8 : index
%c0 = arith.constant 0 : index
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<4x2xi1>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<4x2xi1>
%mask = vector.create_mask %a, %b : vector<2x4xi1>
%transpose_mask = vector.transpose %mask, [1, 0] : vector<2x4xi1> to vector<4x2xi1>
vector.transfer_write %transpose_mask, %3[%c0, %c0] {in_bounds = [true, true]} : vector<4x2xi1>, memref<4x2xi1>
@@ -174,12 +168,10 @@
// Make sure that the gather patterns get rid of vector.gather over strided
// memref.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @gather_strided_memref() {
%cst = arith.constant dense<0.000000e+00> : vector<4xf32>
@@ -187,11 +179,11 @@
%c0_i32 = arith.constant 0 : i32
%c4 = arith.constant 4 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<2592000x3xf32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<2592000x3xf32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<2592000x3xf32, #hal.descriptor_type<storage_buffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<518400xi32, #hal.descriptor_type<storage_buffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<518400xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %1, 64 : memref<518400xi32, #hal.descriptor_type<storage_buffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<518400xf32, #hal.descriptor_type<storage_buffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<518400xf32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %2, 64 : memref<518400xf32, #hal.descriptor_type<storage_buffer>>
%subview = memref.subview %0[0, 0] [2592000, 1] [1, 1] : memref<2592000x3xf32, #hal.descriptor_type<storage_buffer>> to memref<2592000xf32, strided<[3]>, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir
index 2e9b3c4..6d5e65a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/vectorize_with_masking_and_hoist.mlir
@@ -34,12 +34,10 @@
// CHECK-NEXT: %[[INSERT_SLICE:.*]] = tensor.insert_slice %[[OUT_WRITE]] into %[[OUT_SLICE]]{{.*}} : tensor<8x?xf32> into tensor<8x?xf32>
// CHECK-NEXT: tensor.insert_slice %[[INSERT_SLICE]] into %[[OUT_TENSOR_1]]{{.*}} : tensor<8x?xf32> into tensor<1024x1024xf32>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pipeline() {
%c1 = arith.constant 1 : index
@@ -47,9 +45,9 @@
%c16 = arith.constant 16 : index
%c8 = arith.constant 8 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<1024x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir
index 5d7e055..9aa61c2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/verify_linalg_transform_legality.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmcpu-verify-linalg-transform-legality))" %s --verify-diagnostics -split-input-file
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_123x456xf32_times_456x789xf32_into_123x789xf32_dispatch_0() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<123x4x114xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x114x789xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x123x789xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<123x4x114xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x114x789xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x123x789xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [123, 4, 114], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<123x4x114xf32>> -> tensor<123x4x114xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 114, 789], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x114x789xf32>> -> tensor<4x114x789xf32>
%5 = tensor.empty() : tensor<4x123x789xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
index a111245..e5f1149 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp
@@ -202,8 +202,6 @@
}
};
-using SetBinding = std::pair<APInt, APInt>;
-
/// Convention with the HAL side to pass kernel arguments.
/// The bindings are ordered based on binding set and binding index then
/// compressed and mapped to dense set of arguments.
@@ -211,21 +209,16 @@
/// InterfaceBindingOp and kernel argument index.
/// For instance if the kernel has (set, bindings) A(0, 1), B(1, 5), C(0, 6) it
/// will return the mapping [A, 0], [C, 1], [B, 2]
-static llvm::SmallDenseMap<SetBinding, size_t>
+static llvm::SmallDenseMap<APInt, size_t>
getKernelArgMapping(Operation *funcOp) {
- llvm::SetVector<SetBinding> usedBindingSet;
+ llvm::SetVector<APInt> usedBindingSet;
funcOp->walk([&](IREE::HAL::InterfaceBindingSubspanOp subspanOp) {
- usedBindingSet.insert(
- SetBinding(subspanOp.getSet(), subspanOp.getBinding()));
+ usedBindingSet.insert(subspanOp.getBinding());
});
auto sparseBindings = usedBindingSet.takeVector();
std::sort(sparseBindings.begin(), sparseBindings.end(),
- [](SetBinding lhs, SetBinding rhs) {
- if (lhs.first == rhs.first)
- return lhs.second.ult(rhs.second);
- return lhs.first.ult(rhs.first);
- });
- llvm::SmallDenseMap<SetBinding, size_t> mapBindingArgIndex;
+ [](APInt lhs, APInt rhs) { return lhs.ult(rhs); });
+ llvm::SmallDenseMap<APInt, size_t> mapBindingArgIndex;
for (auto [index, binding] : llvm::enumerate(sparseBindings)) {
mapBindingArgIndex[binding] = index;
}
@@ -263,8 +256,7 @@
} else {
llvmType = LLVM::LLVMPointerType::get(rewriter.getContext());
}
- llvmInputTypes[argMapping[SetBinding(subspanOp.getSet(),
- subspanOp.getBinding())]] = llvmType;
+ llvmInputTypes[argMapping[subspanOp.getBinding()]] = llvmType;
});
// As a convention with HAL, push constants are appended as kernel arguments
// after all the binding inputs.
@@ -353,8 +345,8 @@
operands, op->getAttrDictionary());
MemRefType memrefType =
llvm::dyn_cast<MemRefType>(subspanOp.getResult().getType());
- mlir::BlockArgument llvmBufferArg = llvmFuncOp.getArgument(
- argMapping[SetBinding(subspanOp.getSet(), subspanOp.getBinding())]);
+ mlir::BlockArgument llvmBufferArg =
+ llvmFuncOp.getArgument(argMapping[subspanOp.getBinding()]);
// As a convention with HAL all the kernel argument pointers are 16Bytes
// aligned.
llvmFuncOp.setArgAttr(llvmBufferArg.getArgNumber(),
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
index 0d361fe..ac3e7ee 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
@@ -165,7 +165,7 @@
func.func @foo() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32>
%1 = gpu.thread_id x
%2 = arith.cmpi ult, %1, %c1 : index
scf.if %2 {
@@ -186,7 +186,7 @@
%c4 = arith.constant 4 : index
%c32 = arith.constant 32 : index
%cst = arith.constant dense<1.000000e+00> : vector<128xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32>
%1 = gpu.thread_id x
%2 = arith.cmpi ult, %1, %c32 : index
// Single-warp guard filters out threads 32-63.
@@ -266,7 +266,7 @@
%c4 = arith.constant 4 : index
%c32 = arith.constant 32 : index
%cst = arith.constant dense<1.000000e+00> : vector<128xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32>
%1 = gpu.thread_id x
%2 = arith.cmpi ult, %1, %c32 : index
// Single-warp guard filters out threads 32-63.
@@ -290,7 +290,7 @@
%c4 = arith.constant 4 : index
%c32 = arith.constant 32 : index
%cst = arith.constant dense<1.000000e+00> : vector<128xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128xf32>
%1 = gpu.thread_id x
%2 = arith.cmpi ult, %1, %c32 : index
// Single-warp guard filters out threads 32-63.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
index b7ca495..8cb1b2e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_user_vector_distribute.mlir
@@ -16,12 +16,10 @@
// OPT-IN: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
// OPT-IN-SAME: mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
// OPT-IN-SAME: no_reduce_shared_memory_bank_conflicts
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @main_0_dispatch_0 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -54,9 +52,9 @@
}>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
@@ -92,12 +90,10 @@
// OPT-IN: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
// OPT-IN-SAME: mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
// OPT-IN-SAME: reorder_workgroups = "transpose"
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @main_0_dispatch_0 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -131,9 +127,9 @@
}>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
@@ -164,12 +160,10 @@
// OPT-OUT: #[[$TRANSLATION:.+]] = #iree_codegen.translation_info<LLVMGPUVectorDistribute workgroup_size = [128, 2, 1] subgroup_size = 64
// OPT-OUT-SAME: mma_schedule = #iree_gpu.mma_schedule<intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
// OPT-OUT-SAME: reorder_workgroups = "none"
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @main_0_dispatch_0 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -192,9 +186,9 @@
}>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir
index 3450851..710168c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir
@@ -13,12 +13,10 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 4
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d2, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d4)>
@@ -26,9 +24,9 @@
func.func @expanded_matmul_transpose_b() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x2048xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x64x2048xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x10x64x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x2048xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x64x2048xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x10x64x64xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 64, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64x2048xf16>> -> tensor<2x64x2048xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 64, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x64x2048xf16>> -> tensor<10x64x2048xf16>
%5 = tensor.empty() : tensor<2x10x64x64xf16>
@@ -54,19 +52,17 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_nhwc() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x256x512x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x256x512x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 258, 514, 768], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>> -> tensor<2x258x514x768xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 768, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>> -> tensor<3x3x768x256xf16>
%5 = tensor.empty() : tensor<2x256x512x256xf32>
@@ -81,12 +77,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#target = #iree_gpu.target<arch = "gfx940", features = "", wgp = <
compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8,
@@ -99,9 +93,9 @@
func.func @matmul_256x256x256() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%5 = tensor.empty() : tensor<256x256xf32>
@@ -123,19 +117,17 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @mfma_matmul_1024x1024x1024() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>> -> tensor<1024x1024xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>> -> tensor<1024x1024xf16>
%5 = tensor.empty() : tensor<1024x1024xf32>
@@ -156,12 +148,10 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 2, subgroup_n_count = 2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 1, 32, 0, 1, 1, 1, 0]]>
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d5, d2 + d6, d3 + d7, d8)>
@@ -171,9 +161,9 @@
func.func @conv_nchwc() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x20x34x34x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x20x3x3x160x64xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x8x32x32x160xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x20x34x34x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x20x3x3x160x64xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x8x32x32x160xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [2, 20, 34, 34, 64], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x20x34x34x64xf16>> -> tensor<2x20x34x34x64xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 20, 3, 3, 160, 64], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x20x3x3x160x64xf16>> -> tensor<8x20x3x3x160x64xf16>
%5 = tensor.empty() : tensor<2x8x32x32x160xf32>
@@ -207,19 +197,17 @@
// WMMA-SAME: intrinsic = #iree_gpu.mma_layout<WMMA_F32_16x16x16_F16>
// WMMA-SAME: subgroup_m_count = 2, subgroup_n_count = 2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @wmma_matmul_1024x1024x1024() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>> -> tensor<1024x1024xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x1024xf16>> -> tensor<1024x1024xf16>
%5 = tensor.empty() : tensor<1024x1024xf32>
@@ -240,19 +228,17 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 1
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unaligned_mk_batch_matmul() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>> -> tensor<64x968x1281xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1281, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>> -> tensor<64x1281x1281xf16>
%5 = tensor.empty() : tensor<64x968x1281xf16>
@@ -273,19 +259,17 @@
// CHECK-SAME: intrinsic = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>
// CHECK-SAME: subgroup_m_count = 1, subgroup_n_count = 4
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unaligned_m_batch_matmul_64x72x1280x1280() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x72x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1280x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x72x1280xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x72x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1280x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x72x1280xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 72, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x72x1280xf16>> -> tensor<64x72x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1280, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280x1280xf16>> -> tensor<64x1280x1280xf16>
%5 = tensor.empty() : tensor<64x72x1280xf16>
@@ -300,19 +284,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @narrow_n_batch_matmul_64x968x4x320_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x320xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x320x4xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x4xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x320xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x320x4xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x4xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 320], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x968x320xf16>> -> tensor<64x968x320xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 320, 4], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x320x4xf16>> -> tensor<64x320x4xf16>
%5 = tensor.empty() : tensor<64x968x4xf16>
@@ -327,12 +309,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_dynamic_dim() {
%c0 = arith.constant 0 : index
@@ -345,10 +325,10 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x256xf16>>{%8}
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x256xf32>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x256xf16>>{%8}
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x256xf32>>{%8}
%11 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [%8, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x256xf16>>{%8} -> tensor<?x256xf16>
%12 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%13 = tensor.empty(%8) : tensor<?x256xf32>
@@ -369,21 +349,19 @@
// CHECK-LABEL: func.func @attention_20x4096x64x4096x64()
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @attention_20x4096x64x4096x64() {
%cst = arith.constant 1.250000e-01 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir
index 5c0ba99..41253b9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/lowering_scalar_dispatch.mlir
@@ -2,7 +2,10 @@
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable @scalar_dispatch {
hal.executable.variant public @rocm_hsaco_fb target(#executable_target_rocm_hsaco_fb) {
@@ -16,8 +19,8 @@
%c0 = arith.constant 0 : index
%c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64
%c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i64>> -> tensor<i64>
%extracted = tensor.extract %2[] : tensor<i64>
%3 = arith.muli %extracted, %c6364136223846793005_i64 : i64
@@ -32,8 +35,8 @@
// CHECK-LABEL: func.func @scalar_dispatch()
// CHECK-SAME: translation_info = #iree_codegen.translation_info<LLVMGPUBaseLowering workgroup_size = [1, 1, 1]>
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: memref.load %[[SPAN0]][] : memref<i64, #hal.descriptor_type<storage_buffer>>
// CHECK: arith.muli {{.+}} : i64
// CHECK: arith.addi {{.+}} : i64
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
index c7d242b..f4a7238 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 \
// RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{workgroup = [64, 64, 0], reduction = [0, 0, 4], thread = [8, 4]}>
hal.executable public @main {
@@ -21,9 +19,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 1, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
@@ -42,9 +40,9 @@
// analysis should be able to simplify the below to just two barriers.
// CHECK-LABEL: func @matmul_transpose_b
-// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: memref.alloc() : memref<64x8xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: memref.alloc() : memref<64x8xf16, #gpu.address_space<workgroup>>
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c1280 step %c4 {{.*}} -> (vector<8x4xf32>)
@@ -65,12 +63,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{workgroup = [64, 64, 0], reduction = [0, 0, 2], subgroup = [2, 2], mma_kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>}>
hal.executable public @main {
@@ -85,9 +81,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 2, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
@@ -103,9 +99,9 @@
}
// CHECK-LABEL: func @matmul_transpose_b_mfma
-// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space<workgroup>>
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c80 step %c2 {{.*}} -> (vector<2x2x4x1xf32>)
@@ -129,12 +125,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer, ReadOnly>,
- #hal.descriptor_set.binding<1, storage_buffer, ReadOnly>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{workgroup = [1, 64, 64, 0], reduction = [0, 0, 0, 2], subgroup = [1, 2, 2], mma_kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>}>
hal.executable private @main {
@@ -148,9 +142,9 @@
func.func @conv_igemm_im2col() attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 2, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x1280x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x16x16x1280xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x1280x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x16x16x1280xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x34x34x1280xf16>> -> tensor<2x34x34x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 1280, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x1280x1280xf16>> -> tensor<3x3x1280x1280xf16>
%5 = tensor.empty() : tensor<2x16x16x1280xf32>
@@ -187,9 +181,9 @@
}
// CHECK-LABEL: func @conv_igemm_im2col
-// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: memref.alloc() : memref<1x64x36xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: memref.alloc() : memref<32x68xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -217,12 +211,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{
workgroup = [64, 64, 0],
@@ -241,9 +233,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [64, 2, 1] subgroup_size = 32>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280xf16>> -> tensor<2048x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280xf16>> -> tensor<10240x1280xf16>
%5 = tensor.empty() : tensor<2048x10240xf32>
@@ -259,9 +251,9 @@
}
// CHECK-LABEL: func @matmul_transpose_b_wmma
-// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[B0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[B1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[B2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: memref.alloc() : memref<64x36xf16, #gpu.address_space<workgroup>>
// CHECK: %[[LOOP:.+]] = scf.for %[[IV:.+]] = %c0 to %c80 step %c2 {{.*}} -> (vector<2x2x8x1x1xf32>)
@@ -285,12 +277,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{
workgroup = [64, 64, 0],
@@ -313,9 +303,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 2, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>> -> tensor<2048x1280x!eltype>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>> -> tensor<10240x1280x!eltype>
%5 = tensor.empty() : tensor<2048x10240x!aeltype>
@@ -339,12 +329,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{
workgroup = [64, 64, 0],
@@ -367,9 +355,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 2, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>> -> tensor<2048x1280x!eltype>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>> -> tensor<10240x1280x!eltype>
%5 = tensor.empty() : tensor<2048x10240x!aeltype>
@@ -393,12 +381,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{
workgroup = [64, 64, 0],
@@ -421,9 +407,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [128, 2, 1] subgroup_size = 64>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>> -> tensor<2048x1280x!eltype>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>> -> tensor<10240x1280x!eltype>
%5 = tensor.empty() : tensor<2048x10240x!aeltype>
@@ -447,12 +433,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_gpu.lowering_config<{
workgroup = [64, 64, 0],
@@ -475,9 +459,9 @@
attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [64, 2, 1] subgroup_size = 32>} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x10240x!aeltype>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x1280x!eltype>> -> tensor<2048x1280x!eltype>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [10240, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<10240x1280x!eltype>> -> tensor<10240x1280x!eltype>
%5 = tensor.empty() : tensor<2048x10240x!aeltype>
@@ -509,19 +493,15 @@
#translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [8, 4, 1] subgroup_size = 32>
-#pipeline_layout = #hal.pipeline.layout<
- push_constants = 0,
- sets = [
- <0, bindings = [
- <0, storage_buffer, "ReadOnly|Indirect">,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer, Indirect>
- ], flags = Indirect>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, Indirect>
+]>
hal.executable public @main {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
- hal.executable.export public @conv_nchw_fused ordinal(0) layout(#pipeline_layout) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
+ hal.executable.export public @conv_nchw_fused ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
@@ -531,9 +511,9 @@
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant dense<1.0> : tensor<1x64xf32>
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x64x58x58xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x64x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x64x56x56xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x64x58x58xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x64x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x64x56x56xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 64, 58, 58], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x64x58x58xf32>> -> tensor<1x64x58x58xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [64, 64, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x64x3x3xf32>> -> tensor<64x64x3x3xf32>
%5 = tensor.empty() : tensor<1x64x56x56xf32>
@@ -574,19 +554,15 @@
#translation_info = #iree_codegen.translation_info<LLVMGPUTileAndFuse workgroup_size = [8, 4, 1] subgroup_size = 32>
-#pipeline_layout = #hal.pipeline.layout<
- push_constants = 0,
- sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, "ReadOnly|Indirect">,
- <2, storage_buffer, Indirect>
- ], flags = Indirect>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, Indirect>
+]>
hal.executable public @main {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
- hal.executable.export public @skinny_matmul_config ordinal(0) layout(#pipeline_layout) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
+ hal.executable.export public @skinny_matmul_config ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
@@ -598,10 +574,10 @@
%c111444672 = arith.constant 111444672 : index
%c4014080 = arith.constant 4014080 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c102227904) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c4014080) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<256x3136xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c111444672) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<128x3136xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c102227904) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c4014080) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<256x3136xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c111444672) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<128x3136xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 3136], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x3136xf32>> -> tensor<256x3136xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir
index d8f9673..1b7c818 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute.mlir
@@ -12,12 +12,10 @@
// to be migrated to the rocdl heuristics, but for now is just physically
// located here.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_f16_f32 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -30,9 +28,9 @@
func.func @matmul_256x256x256_f16_f32() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%5 = tensor.empty() : tensor<256x256xf32>
@@ -63,12 +61,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_f16_f16 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -81,9 +77,9 @@
func.func @matmul_256x256x256_f16_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%5 = tensor.empty() : tensor<256x256xf16>
@@ -112,12 +108,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @expanded_matmul_transpose_b_executable {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -130,11 +124,11 @@
func.func @expanded_matmul_transpose_b() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<2x64x2048xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<10x64x2048xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<2x10x64x64xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 64, 2048], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<2x64x2048xf16>> -> tensor<2x64x2048xf16>
@@ -184,12 +178,10 @@
// Basic f8, f8 -> f32 matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_f8_f32 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -202,9 +194,9 @@
func.func @matmul_256x256x256_f8_f32() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>> -> tensor<256x256xf8E4M3FNUZ>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf8E4M3FNUZ>> -> tensor<256x256xf8E4M3FNUZ>
%5 = tensor.empty() : tensor<256x256xf32>
@@ -235,12 +227,10 @@
// Basic i8, i8 -> i32 matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_i8_i32 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -253,9 +243,9 @@
func.func @matmul_256x256x256_i8_i32() {
%cst = arith.constant 0 : i32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
%5 = tensor.empty() : tensor<256x256xi32>
@@ -286,12 +276,10 @@
// Basic i8, i8 -> i32 matmul_transpose_b.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_transpose_b_256x256x256_i8_i32 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -304,9 +292,9 @@
func.func @matmul_transpose_b_256x256x256_i8_i32() {
%cst = arith.constant 0 : i32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
%5 = tensor.empty() : tensor<256x256xi32>
@@ -335,12 +323,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @conv_nhwc_dispatch_0 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -353,9 +339,9 @@
func.func @conv_nhwc() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x256x512x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x256x512x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 258, 514, 768], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x258x514x768xf16>> -> tensor<2x258x514x768xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 768, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x768x256xf16>> -> tensor<3x3x768x256xf16>
%5 = tensor.empty() : tensor<2x256x512x256xf32>
@@ -377,12 +363,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>
@@ -403,9 +387,9 @@
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = arith.index_castui %0 : i32 to index
%3 = arith.index_castui %1 : i32 to index
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1024x1280xf16>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x64x1280xf16>>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%3) : !flow.dispatch.tensor<writeonly:tensor<2x1024x20x64xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%2) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1024x1280xf16>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x64x1280xf16>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%3) : !flow.dispatch.tensor<writeonly:tensor<2x1024x20x64xf16>>
%7 = flow.dispatch.tensor.load %4, offsets = [0, 0, 0], sizes = [2, 1024, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1024x1280xf16>> -> tensor<2x1024x1280xf16>
%8 = flow.dispatch.tensor.load %5, offsets = [0, 0, 0], sizes = [20, 64, 1280], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x64x1280xf16>> -> tensor<20x64x1280xf16>
%9 = tensor.empty() : tensor<2x1024x20x64xf16>
@@ -446,12 +430,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_f16_f32 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -464,9 +446,9 @@
func.func @matmul_256x256x256_f16_f32() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%5 = tensor.empty() : tensor<256x256xf32>
@@ -498,12 +480,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matmul_256x256x256_f16_f16 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -516,9 +496,9 @@
func.func @matmul_256x256x256_f16_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<256x256xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16>
%5 = tensor.empty() : tensor<256x256xf16>
@@ -550,12 +530,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @unaligned_mk_batch_matmul_64x978x1281x1281_f16_f16 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -568,9 +546,9 @@
func.func @unaligned_nk_batch_matmul() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [64, 968, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>> -> tensor<64x968x1281xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [64, 1281, 1281], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>> -> tensor<64x1281x1281xf16>
%5 = tensor.empty() : tensor<64x968x1281xf16>
@@ -595,9 +573,9 @@
// CHECK-DAG: %[[RHS_SHARED_SUB:.+]] = memref.subview %[[RHS_SHARED]][0, 0, 0] [1, 16, 16] [1, 1, 1]
// CHECK-DAG: %[[LHS_SHARED:.+]] = memref.alloc() : memref<1x16x20xf16, #gpu.address_space<workgroup>>
// CHECK-DAG: %[[LHS_SHARED_SUB:.+]] = memref.subview %[[LHS_SHARED]][0, 0, 0] [1, 16, 16] [1, 1, 1]
-// CHECK-DAG: %[[LHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x968x1281xf16, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[RHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x1281x1281xf16, #hal.descriptor_type<storage_buffer>>
-// CHECK-DAG: %[[OUT_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : memref<64x968x1281xf16, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[LHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x968x1281xf16, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[RHS_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : memref<64x1281x1281xf16, #hal.descriptor_type<storage_buffer>>
+// CHECK-DAG: %[[OUT_GLOBAL:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : memref<64x968x1281xf16, #hal.descriptor_type<storage_buffer>>
// CHECK-DAG: %[[LHS_GLOBAL_SUB:.+]] = memref.subview %[[LHS_GLOBAL]]
// CHECK-DAG: %[[RHS_GLOBAL_SUB:.+]] = memref.subview %[[RHS_GLOBAL]]
// CHECK: %[[LHS_LOAD:.+]] = vector.transfer_read %[[LHS_GLOBAL_SUB]]{{.+}} {in_bounds = [true, false, false]}
@@ -634,11 +612,9 @@
// NOTE: This test is not exhaustive of all possible ways the above condition is breaking,
// but rather is an example of a matmul shape from a model that broke our compilation heuristic.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @contract_schedule_considering_read_layout {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -656,9 +632,9 @@
%3 = arith.index_castui %0 : i32 to index
%4 = arith.index_castui %1 : i32 to index
%5 = arith.index_castui %2 : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x160x1536xf16>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1536x1536xf16>>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<2x160x1536xf16>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x160x1536xf16>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1536x1536xf16>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<2x160x1536xf16>>
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0], sizes = [2, 160, 1536], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x160x1536xf16>> -> tensor<2x160x1536xf16>
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0], sizes = [2, 1536, 1536], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1536x1536xf16>> -> tensor<2x1536x1536xf16>
%11 = tensor.empty() : tensor<2x160x1536xf16>
@@ -690,13 +666,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @attention_20x4096x64x4096x64 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -709,10 +683,10 @@
func.func @attention_20x4096x64x4096x64() {
%cst = arith.constant 1.250000e-01 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<20x4096x64xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [20, 4096, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<20x4096x64xf16>> -> tensor<20x4096x64xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir
index 846b30e..4128e4f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_warp_reduction.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-rocdl-configuration-pipeline), iree-codegen-linalg-to-rocdl-pipeline2)))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @warp_reduction {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -17,8 +15,8 @@
func.func @warp_reduction() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x512xf32>> -> tensor<2x512xf32>
%3 = tensor.empty() : tensor<2xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32>
@@ -42,12 +40,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @main_dispatch_517 {
hal.executable.variant public @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -62,9 +58,9 @@
%c128 = arith.constant 128 : index
%c0 = arith.constant 0 : index
%c394240 = arith.constant 394240 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor<writeonly:tensor<1x1280xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor<writeonly:tensor<1x1280xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>> -> tensor<1x1280xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>> -> tensor<1280x1280xf32>
%5 = tensor.empty() : tensor<1x1280xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir
index e029174..649cbd9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention.mlir
@@ -2,21 +2,19 @@
// RUN: --iree-gpu-test-target=sm_60 | \
// RUN: FileCheck --check-prefix=CHECK %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @_attention_dispatch_0() {
%c0 = arith.constant 0 : index
%cst = arith.constant 1.250000e-01 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<192x1024x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<192x1024x64xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>> -> tensor<192x1024x64xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>> -> tensor<192x1024x64xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [192, 1024, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<192x1024x64xf16>> -> tensor<192x1024x64xf16>
@@ -52,16 +50,16 @@
// CHECK-DAG: %[[C1024:.+]] = arith.constant 1024 : index
// CHECK-DAG: %[[CST_5:.+]] = arith.constant 0.000000e+00 : f32
// CHECK-dAG: %[[CST_6:.+]] = arith.constant dense<1.802980e-01> : vector<128x64xf16>
-// CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64)
+// CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64)
// CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D0]], 64 : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64)
+// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64)
// CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D1]], 64 : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64)
+// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64)
// CHECK-SAME: offset(%[[C0]]) flags(ReadOnly) : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D2]], 64 : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) alignment(64)
+// CHECK: %[[D3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) alignment(64)
// CHECK-SAME: offset(%[[C0]]) : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D3]], 64 : memref<192x1024x64xf16, #hal.descriptor_type<storage_buffer>>
// CHECK: %[[WORKGROUP_ID_X:.+]] = hal.interface.workgroup.id[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir
index 7e69786..13a84ec 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma.mlir
@@ -2,21 +2,19 @@
// RUN: --iree-gpu-test-target=gfx908 | \
// RUN: FileCheck --check-prefix=CHECK %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @attention_dispatch_0_attention_16x16384x128xf16() {
%c0 = arith.constant 0 : index
%scale = arith.constant 0.08838834764 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x16384x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x16384x128xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>> -> tensor<16x16384x128xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>> -> tensor<16x16384x128xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 16384, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16384x128xf16>> -> tensor<16x16384x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir
index 2782383..1a60072 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s --check-prefix=CDNA3
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dynamic_batch_matvec() {
%c32_i64 = arith.constant 32 : i64
@@ -21,11 +19,11 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<32x1x128xf16>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<32x1x128xf16>>
%11 = flow.dispatch.workload.ordinal %8, 0 : index
%12 = flow.dispatch.workload.ordinal %9, 1 : index
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11}
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12}
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11}
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12}
%15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [32, 1, %11], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11} -> tensor<32x1x?xf16>
%16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0], sizes = [32, %12, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12} -> tensor<32x?x128xf16>
%17 = tensor.empty() : tensor<32x1x128xf16>
@@ -46,12 +44,10 @@
// This test uses special heuristics that needs to check the backend in the #hal.executable.target.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -60,9 +56,9 @@
func.func @vmt1() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>> -> tensor<1x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<1x32000xf16>
@@ -88,12 +84,10 @@
// This test uses special heuristics that needs to check the backend in the #hal.executable.target.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -102,9 +96,9 @@
func.func @vmt2() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>> -> tensor<1x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<1x32000xf16>
@@ -128,14 +122,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -144,11 +136,11 @@
func.func @i4_dequant_matvec() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>> -> tensor<4096x32x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
@@ -187,19 +179,17 @@
// Send 2xNxK mmt to the warp reduction pipeline.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @skinny_mmt() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>> -> tensor<2x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<2x32000xf16>
@@ -220,19 +210,17 @@
// Send Mx2xK mmt to the warp reduction pipeline.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @skinny_mmt() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32000x2xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32000x2xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4096xf16>> -> tensor<2x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<32000x2xf16>
@@ -251,12 +239,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
@@ -264,9 +250,9 @@
func.func @not_vmt() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<5x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<5x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [5, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<5x4096xf16>> -> tensor<5x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<5x32000xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir
index 8db080c..e5d6b2a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_winograd.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline='builtin.module(iree-llvmgpu-select-lowering-strategy)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_filter_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>> -> tensor<3x3x64x128xf32>
%3 = tensor.empty() : tensor<8x8x64x128xf32>
%4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32>
@@ -26,16 +24,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_input_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>> -> tensor<2x34x34x128xf16>
%3 = tensor.empty() : tensor<8x8x2x6x6x128xf16>
%4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16>
@@ -52,16 +48,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_output_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>> -> tensor<8x8x2x6x6x128xf16>
%3 = tensor.empty() : tensor<2x36x36x128xf16>
%4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir
index de7bbb4..d129117 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_cuda.mlir
@@ -3,12 +3,10 @@
// RUN: %s | FileCheck %s
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv2d_1x230x230x3_7x7x3x64_dispatch_0 {
hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
@@ -21,9 +19,9 @@
func.func @conv2d_1x230x230x3_7x7x3x64() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x230x230x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x230x230x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x64xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x230x230x3xf32>> -> tensor<1x230x230x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>> -> tensor<7x7x3x64xf32>
%5 = tensor.empty() : tensor<1x112x112x64xf32>
@@ -50,12 +48,10 @@
// -----
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv_nchw_dispatch_0 {
hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
@@ -68,9 +64,9 @@
func.func @conv_nchw() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x66x66xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<320x4x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x66x66xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<320x4x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x66x66xf32>> -> tensor<2x4x66x66xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<320x4x3x3xf32>> -> tensor<320x4x3x3xf32>
%5 = tensor.empty() : tensor<2x320x64x64xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir
index 6035b4b..ec67064 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir
@@ -2,13 +2,11 @@
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target,canonicalize)))))' \
// RUN: %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv_nchw_dispatch_1 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -23,10 +21,10 @@
func.func @conv_2d_nchw_fchw_2x320x64x64x320x3x3_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x320x130x130xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x320x130x130xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 320, 130, 130], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x320x130x130xf16>> -> tensor<2x320x130x130xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [320, 320, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<320x320x3x3xf16>> -> tensor<320x320x3x3xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [320], strides = [1] : !flow.dispatch.tensor<readonly:tensor<320xf16>> -> tensor<320xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
index ea5f33f..f20c2c3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_nvvm.mlir
@@ -1,14 +1,10 @@
// RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-nvvm))))" --iree-gpu-test-target=sm_60 --split-input-file %s | FileCheck %s
// Test that that standard and GPU ops are converted to LLVM and NVVM.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @abs_ex_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -17,9 +13,9 @@
func.func @abs_ex_dispatch_0() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<16xi32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -44,14 +40,10 @@
// CHECK: llvm.store %[[FADD]], %[[ADDR]] : f32, !llvm.ptr
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @abs_dynamic {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -66,9 +58,9 @@
%d0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%d1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
%d2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) offset(%o) : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>{%d0, %d1, %d2}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?x?xi32>{%d0, %d1, %d2}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<?x?x?xf32>{%d0, %d1, %d2}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%o) : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>{%d0, %d1, %d2}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?x?xi32>{%d0, %d1, %d2}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<?x?x?xf32>{%d0, %d1, %d2}
%9 = memref.load %0[%c3, %c5, %c7] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
%10 = memref.load %1[%c3, %c5, %c7] : memref<?x?x?xi32>
%11 = arith.sitofp %10 : i32 to f32
@@ -106,13 +98,9 @@
// Test that we handle correctly the case where bindings are sparse (set 0
// binding 0 is not used).
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @dead_symbol {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -121,8 +109,8 @@
func.func @dead_symbol() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xi32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -146,11 +134,9 @@
// A single binding may contain different data types.
// Test that we cast pointers correctly.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @mixed_type {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -159,9 +145,9 @@
func.func @mixed_type() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) : memref<16xf32, strided<[1], offset: 4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c0) : memref<16xi32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) : memref<16xf32, strided<[1], offset: 4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c0) : memref<16xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -187,10 +173,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @shared_memory_lowering {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -227,10 +211,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @shared_memory_dealloc_elision {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -253,10 +235,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @shared_memory_lowering_aligned_alloc {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -288,15 +268,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @check_not_readonly {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -305,13 +281,13 @@
func.func @check_not_readonly() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<16xi32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
- %b11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) flags(ReadOnly) : memref<16xi32>
- %b12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) offset(%c128) : memref<16xf32, strided<[1], offset: 32>>
- %b21 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) flags(ReadOnly) : memref<16xi32>
- %b22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<16xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
+ %b11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) flags(ReadOnly) : memref<16xi32>
+ %b12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) offset(%c128) : memref<16xf32, strided<[1], offset: 32>>
+ %b21 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) flags(ReadOnly) : memref<16xi32>
+ %b22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) offset(%c128) flags(ReadOnly) : memref<16xf32, strided<[1], offset: 32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -332,13 +308,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @complex {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -347,8 +319,8 @@
func.func @complex() {
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c128) flags(ReadOnly) : memref<16xcomplex<f32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c128) flags(ReadOnly) : memref<16xcomplex<f32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -371,10 +343,8 @@
// -----
// Check that we don't choke on memref of index.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @shared_memory_lowering_index {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -398,11 +368,9 @@
// CHECK-NEXT: %{{.*}} = llvm.getelementptr %{{.*}} : (!llvm.ptr<3>, i64, i64) -> !llvm.ptr<3>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @masked_load_store {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -412,8 +380,8 @@
%c0 = arith.constant 0 : index
%idx = gpu.thread_id x
%pass_thru = arith.constant dense<0.000000e+00> : vector<1xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space<global>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space<global>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space<global>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space<global>>
%mask = vector.create_mask %idx : vector<1xi1>
%ld = vector.maskedload %0[%idx], %mask, %pass_thru : memref<64xf32, #gpu.address_space<global>>, vector<1xi1>, vector<1xf32> into vector<1xf32>
vector.maskedstore %1[%idx], %mask, %ld : memref<64xf32, #gpu.address_space<global>>, vector<1xi1>, vector<1xf32>
@@ -429,12 +397,10 @@
// -----
// Test workgroup size lowering
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_wg_size {
hal.executable.variant @rocm target(<"cuda", "cuda-nvptx-fb">) {
@@ -446,7 +412,7 @@
%c0 = arith.constant 0.0 : f32
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32>
memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32>
return
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
index a88ec61..3e158e1 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/convert_to_rocdl.mlir
@@ -2,15 +2,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx908 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-rocdl))))" --iree-hip-index-bits=32 %s | FileCheck %s --check-prefix=INDEX32
// Test that that standard and GPU ops are converted to LLVM and NVVM.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @abs_ex_dispatch_0 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -18,9 +14,9 @@
builtin.module {
func.func @abs_ex_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) flags(ReadOnly) : memref<16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) flags(ReadOnly) : memref<16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<16xf32>
%3 = gpu.block_id x
%4 = gpu.block_dim x
%5 = gpu.thread_id x
@@ -48,14 +44,10 @@
// -----
// Test that maximum and minum are converted to max and min on rocm
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @abs_ex_dispatch_0 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -63,9 +55,9 @@
builtin.module {
func.func @reduction_maximum() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) :
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) :
memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x64xf32,
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x64xf32,
strided<[4096, 64, 1], offset: ?>>
%2 = vector.load %0[%c0, %c0, %c0] : memref<32x64x64xf32, strided<[4096, 64, 1], offset: ?>>, vector<2xf32>
%3 = vector.reduction <maximumf>, %2 : vector<2xf32> into f32
@@ -81,10 +73,8 @@
// -----
// Test that gpu barriers be lowered to `s_waitcnt lgkmcnt(0)\0As_barrier` on rocm
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @simple_barrier {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -101,11 +91,9 @@
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier", "" : () -> ()
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @masked_load_store {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -115,8 +103,8 @@
%c0 = arith.constant 0 : index
%idx = gpu.thread_id x
%pass_thru = arith.constant dense<0.000000e+00> : vector<1xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space<global>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space<global>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<64xf32, #gpu.address_space<global>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64xf32, #gpu.address_space<global>>
%mask = vector.create_mask %idx : vector<1xi1>
%ld = vector.maskedload %0[%idx], %mask, %pass_thru : memref<64xf32, #gpu.address_space<global>>, vector<1xi1>, vector<1xf32> into vector<1xf32>
vector.maskedstore %1[%idx], %mask, %ld : memref<64xf32, #gpu.address_space<global>>, vector<1xi1>, vector<1xf32>
@@ -132,12 +120,10 @@
// -----
// Test workgroup size lowering
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_wg_size {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -149,7 +135,7 @@
%c0 = arith.constant 0.0 : f32
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32>
memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32>
return
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
index c419526..14259a9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tile-and-distribute))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[2, 256, 4]]>
#map = affine_map<()[s0] -> (s0 * 2)>
@@ -16,9 +14,9 @@
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x1024xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -70,12 +68,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 8, 32, 32]]>
#map = affine_map<()[s0] -> (s0 * 8)>
@@ -90,11 +86,11 @@
%c4 = arith.constant 4 : index
%c32 = arith.constant 32 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32>
memref.assume_alignment %0, 32 : memref<4x32x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32>
memref.assume_alignment %1, 32 : memref<4x1024x64xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32>
memref.assume_alignment %2, 32 : memref<4x32x64xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -143,12 +139,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[2, 32, 4]]>
#map = affine_map<()[s0] -> (s0 * 2)>
@@ -159,9 +153,9 @@
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x1024xf32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
@@ -215,11 +209,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[]]>
#map = affine_map<(d0) -> (d0)>
@@ -229,8 +221,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0x7FC00000 : f32
%cst_0 = arith.constant 0xFF800000 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1000xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<f32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1000xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<f32>
linalg.fill {lowering_config = #config} ins(%cst_0 : f32) outs(%1 : memref<f32>)
linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref<f32>) attrs = {lowering_config = #config} {
^bb0(%in: f32, %out: f32):
@@ -253,12 +245,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 1, 256, 4, 4, 4]]>
#map = affine_map<()[s0] -> (s0 * 256)>
@@ -274,11 +264,11 @@
%c41664 = arith.constant 41664 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x64x56x56xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x64x56x56xf32>
memref.assume_alignment %0, 64 : memref<1x64x56x56xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c41664) : memref<64x64x1x1xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c41664) : memref<64x64x1x1xf32>
memref.assume_alignment %1, 64 : memref<64x64x1x1xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c802816) : memref<1x64x56x56xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c802816) : memref<1x64x56x56xf32>
memref.assume_alignment %2, 64 : memref<1x64x56x56xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -316,12 +306,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 2, 256, 4]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulSimt workgroup_size = [64, 8, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
@@ -342,9 +330,9 @@
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_cast %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) : memref<?x?x12x64xf32>{%1, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%1) : memref<?x?x12x64xf32>{%1, %1}
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<?x12x?x?xf32>{%1, %1, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) : memref<?x?x12x64xf32>{%1, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%1) : memref<?x?x12x64xf32>{%1, %1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<?x12x?x?xf32>{%1, %1, %1}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir
index 30e5278..7f50d1d 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/elementwise_pipeline.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d0, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @forward_dispatch_0_generic_320x320x3x3() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x320x320x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<320x320x3x3xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x320x320x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<320x320x3x3xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 320, 320, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x320x320x3xf32>> -> tensor<3x320x320x3xf32>
%3 = tensor.empty() : tensor<320x320x3x3xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<3x320x320x3xf32>) outs(%3 : tensor<320x320x3x3xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir
index 74b075c..2c171b2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_pipeline_generalize_named_ops.mlir
@@ -10,21 +10,19 @@
// CHECK-NEXT: linalg.generic
// CHECK-NOT: linalg.matmul_transpose_b
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @warp_reduction_large_vector() {
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c0 = arith.constant 0 : index
%c394240 = arith.constant 394240 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor<writeonly:tensor<1x1280xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c128) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c394240) : !flow.dispatch.tensor<writeonly:tensor<1x1280xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1280xf32>> -> tensor<1x1280xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1280x1280xf32>> -> tensor<1280x1280xf32>
%5 = tensor.empty() : tensor<1x1280xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
index 3c3932c..50d9895 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
@@ -5,19 +5,17 @@
// Transform dialect attributes are tested separately.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0) -> (d0)>
func.func @add_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
%3 = tensor.empty() : tensor<16384xf32>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [16384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<16384xf32>> -> tensor<16384xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [16384], strides = [1] : !flow.dispatch.tensor<readonly:tensor<16384xf32>> -> tensor<16384xf32>
@@ -39,21 +37,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dot_dispatch_1() {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c2 = arith.constant 2 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x3xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x4xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x3xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x4xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x4xf32>
linalg.fill ins(%cst : f32) outs(%2 : memref<2x4xf32>)
linalg.matmul ins(%0, %1 : memref<2x3xf32>, memref<3x4xf32>) outs(%2 : memref<2x4xf32>)
return
@@ -70,21 +66,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unaligned_k() {
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c2 = arith.constant 2 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<128x258xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<258x64xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<128x64xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<128x258xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<258x64xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<128x64xf32>
linalg.fill ins(%cst : f32) outs(%2 : memref<128x64xf32>)
linalg.matmul ins(%0, %1 : memref<128x258xf32>, memref<258x64xf32>) outs(%2 : memref<128x64xf32>)
return
@@ -101,11 +95,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> ()>
@@ -113,8 +105,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0x7FC00000 : f32
%cst_0 = arith.constant 0xFF800000 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1000xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<f32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1000xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<f32>
linalg.fill ins(%cst_0 : f32) outs(%1 : memref<f32>)
linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["reduction"]} ins(%0 : memref<1000xf32>) outs(%1 : memref<f32>) {
^bb0(%in: f32, %out: f32):
@@ -138,19 +130,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
func.func @reduction_aligned2() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x128x384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x128x384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 128, 384], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x128x384xf32>> -> tensor<4x128x384xf32>
%3 = tensor.empty() : tensor<128x384xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<128x384xf32>) -> tensor<128x384xf32>
@@ -174,19 +164,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @copy_as_generic() {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xi32>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xi32>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xi32>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xi32>{%0, %1}
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : memref<?x?xi32>) outs(%3 : memref<?x?xi32>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
@@ -203,19 +191,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_1d_fft_stage2() {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32>
%cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%4:2 = iree_linalg_ext.fft {__internal_linalg_transform__ = "workgroup"} ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32>
@@ -233,11 +219,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_3d_fft_stage3() {
%c0 = arith.constant 0 : index
@@ -249,8 +233,8 @@
%cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32>
%0 = bufferization.to_memref %cst_0 : memref<4xf32>
%1 = bufferization.to_memref %cst : memref<4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32>
iree_linalg_ext.fft {__internal_linalg_transform__ = "workgroup"} ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>)
return
}
@@ -264,12 +248,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 128, 64]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulSimt workgroup_size = [16, 8, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
@@ -279,9 +261,9 @@
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>> -> tensor<256x1024xf32>
%5 = tensor.empty() : tensor<128x1024xf32>
@@ -302,22 +284,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @sort_op() {
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c2304000 = arith.constant 2304000 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<1x576000xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<1x576000xi32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<1x576000xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c2304000) : !flow.dispatch.tensor<writeonly:tensor<1x576000xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<1x576000xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<1x576000xi32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<1x576000xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c2304000) : !flow.dispatch.tensor<writeonly:tensor<1x576000xi32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 576000], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x576000xf32>> -> tensor<1x576000xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1, 576000], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x576000xi32>> -> tensor<1x576000xi32>
%6:2 = iree_linalg_ext.sort dimension(1) outs(%4, %5 : tensor<1x576000xf32>, tensor<1x576000xi32>) {
@@ -339,21 +319,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_config_sm35() {
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>> -> tensor<256x1024xf32>
%5 = tensor.empty() : tensor<128x1024xf32>
@@ -369,21 +347,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_config_sm80() {
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>> -> tensor<256x1024xf32>
%5 = tensor.empty() : tensor<128x1024xf32>
@@ -399,21 +375,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_config_sm86() {
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>> -> tensor<256x1024xf32>
%5 = tensor.empty() : tensor<128x1024xf32>
@@ -429,12 +403,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 128, 32]]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -445,9 +417,9 @@
%c40064 = arith.constant 40064 : index
%c34752 = arith.constant 34752 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x7xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c40064) : !flow.dispatch.tensor<readonly:tensor<3x64x4x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c34752) : !flow.dispatch.tensor<writeonly:tensor<3x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x7xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c40064) : !flow.dispatch.tensor<readonly:tensor<3x64x4x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c34752) : !flow.dispatch.tensor<writeonly:tensor<3x64xf32>>
%3 = tensor.empty() : tensor<3x64xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 4], sizes = [3, 64, 4, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x64x4x8xf32>> -> tensor<3x64x4xf32>
%5 = linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%3 : tensor<3x64xf32>) -> tensor<3x64xf32>
@@ -470,11 +442,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dynamic_pack_2x2() {
%c0 = arith.constant 0 : index
@@ -487,8 +457,8 @@
%5 = arith.index_castui %1 : i32 to index
%6 = arith.index_castui %2 : i32 to index
%7 = arith.index_castui %3 : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x2x2xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x2x2xi32>>{%6, %7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%4, %5} -> tensor<?x?xi32>
%11 = tensor.empty(%6, %7) : tensor<?x?x2x2xi32>
%pack = tensor.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor<?x?xi32> -> tensor<?x?x2x2xi32>
@@ -505,21 +475,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @large_matmul_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2560x1792xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1792x2048xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2560x2048xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2560x1792xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1792x2048xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2560x2048xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 1792], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2560x1792xf16>> -> tensor<2560x1792xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1792, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1792x2048xf16>> -> tensor<1792x2048xf16>
%5 = tensor.empty() : tensor<2560x2048xf16>
@@ -539,21 +507,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @large_matmul_f32() {
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2560x1792xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1792x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2560x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2560x1792xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1792x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2560x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 1792], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2560x1792xf32>> -> tensor<2560x1792xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1792, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1792x2048xf32>> -> tensor<1792x2048xf32>
%5 = tensor.empty() : tensor<2560x2048xf32>
@@ -574,19 +540,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @inner_unit_dim() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384x1xf32>>
%3 = tensor.empty() : tensor<16384x1xf32>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16384, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>> -> tensor<16384x1xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16384, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16384x1xf32>> -> tensor<16384x1xf32>
@@ -608,12 +572,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d3)>
@@ -627,9 +589,9 @@
%cst_3 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_4 = arith.constant dense_resource<__elided__> : tensor<64xf32>
%cst_5 = arith.constant dense_resource<__elided__> : tensor<64xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x230x230x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c162508800) : !flow.dispatch.tensor<writeonly:tensor<256x112x112x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x230x230x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c162508800) : !flow.dispatch.tensor<writeonly:tensor<256x112x112x64xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [256, 230, 230, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<256x230x230x3xf32>> -> tensor<256x230x230x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [7, 7, 3, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<7x7x3x64xf32>> -> tensor<7x7x3x64xf32>
%5 = tensor.empty() : tensor<256x112x112x64xf32>
@@ -660,11 +622,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d2, d1, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d1, d4)>
@@ -679,9 +639,9 @@
%3 = arith.index_castui %0 {stream.alignment = 64 : index, stream.values = [35524672 : index, 240930880 : index, 446337088 : index, 651743296 : index]} : i32 to index
%4 = arith.index_castui %1 {stream.alignment = 64 : index, stream.values = [57544768 : index, 262950976 : index, 468357184 : index, 673763392 : index]} : i32 to index
%5 = arith.index_castui %2 {stream.alignment = 64 : index, stream.values = [1728 : index, 36472832 : index, 72943744 : index, 109415936 : index]} : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<512x4x42x42xf32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<512x4x42x42xf32>>
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0, 0, 0], sizes = [512, 42, 4, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>> -> tensor<512x42x4x64xf32>
%10 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0, 0], sizes = [512, 42, 4, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<512x42x4x64xf32>> -> tensor<512x42x4x64xf32>
%11 = tensor.empty() : tensor<512x4x42x42xf32>
@@ -712,13 +672,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1) -> (d0)>
@@ -755,12 +713,12 @@
%24 = arith.shli %23, %c32_i64 : i64
%25 = arith.ori %22, %24 : i64
%26 = arith.index_castui %25 : i64 to index
- %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x11008xi4>>
- %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf32>>
- %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf32>>
+ %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x11008xi4>>
+ %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf32>>
+ %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf32>>
%30 = flow.dispatch.workload.ordinal %26, 0 : index
- %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x11008xf32>>{%30}
- %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
+ %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x11008xf32>>{%30}
+ %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
%33 = flow.dispatch.tensor.load %27, offsets = [0, 0], sizes = [4096, 11008], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x11008xi4>> -> tensor<4096x11008xi4>
%34 = flow.dispatch.tensor.load %28, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4096xf32>> -> tensor<4096xf32>
%35 = flow.dispatch.tensor.load %29, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4096xf32>> -> tensor<4096xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
index 7313f52..436ef52 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/illegal_configuration.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" --verify-diagnostics --split-input-file %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = []>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulSimt workgroup_size = [32, 8, 8], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{Total number of threads in a thread block 2048 exceeds the limit of 1024 with compilation pipeline LLVMGPUMatmulSimt}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -21,20 +19,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = []>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulSimt workgroup_size = [32, 8, 2], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{Expected workgroup size in z-dim = 1, but got 2 with compilation pipeline LLVMGPUMatmulSimt}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -42,20 +38,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [64, 2, 10], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32>
// expected-error @+1 {{Total number of threads in a thread block 1280 exceeds the limit of 1024 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>)
return
@@ -63,20 +57,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [48, 2, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32>
// expected-error @+1 {{Number of threads in x-dim 48 is not a multiple of warp size (32) or integer units of warps in x-dim with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>)
return
@@ -84,20 +76,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [64, 2, 2], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32>
// expected-error @+1 {{Expected workgroup size in z-dim = 1, but got 2 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>)
return
@@ -105,20 +95,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 20]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [64, 2, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x32xf32>
// expected-error @+1 {{Thread block shape 32, 32, 20 cannot be tiled on matmul shape 32, 32, 16 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x32xf32>) outs(%2 : memref<32x32xf32>)
return
@@ -126,20 +114,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [128, 1, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32>
// expected-error @+1 {{Tensor Core instruction shape 16, 16, 8 cannot be tiled on warp shape 64, 8, 16 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>)
return
@@ -147,20 +133,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [64, 2, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<48x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<48x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<48x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<48x32xf32>
// expected-error @+1 {{Thread block shape 32, 32, 16 cannot be tiled on matmul shape 48, 32, 16 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<48x16xf32>, memref<16x32xf32>) outs(%2 : memref<48x32xf32>)
return
@@ -168,20 +152,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 16]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCore workgroup_size = [64, 2, 1], {pipeline_depth = 0 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<32x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x48xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<32x48xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<32x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x48xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<32x48xf32>
// expected-error @+1 {{Thread block shape 32, 32, 16 cannot be tiled on matmul shape 32, 48, 16 with compilation pipeline LLVMGPUMatmulTensorCore}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<32x16xf32>, memref<16x48xf32>) outs(%2 : memref<32x48xf32>)
return
@@ -189,12 +171,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[2, 32, 32, 16]]>
#map = affine_map<()[s0] -> (s0 * 8)>
@@ -209,11 +189,11 @@
%c4 = arith.constant 4 : index
%c32 = arith.constant 32 : index
%c64 = arith.constant 64 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : memref<4x32x1024xf32>
memref.assume_alignment %0, 32 : memref<4x32x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : memref<4x1024x64xf32>
memref.assume_alignment %1, 32 : memref<4x1024x64xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : memref<4x32x64xf32>
memref.assume_alignment %2, 32 : memref<4x32x64xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -242,20 +222,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 32, 48]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCoreMmaSync workgroup_size = [128, 1, 1], {pipeline_depth = 4 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32>
// expected-error @+1 {{Thread block shape 64, 32, 48 cannot be tiled on matmul shape 1024, 256, 512 with compilation pipeline LLVMGPUMatmulTensorCoreMmaSync}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>)
return
@@ -263,20 +241,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 32, 4]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCoreMmaSync workgroup_size = [128, 1, 1], {pipeline_depth = 4 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xf32>
// expected-error @+1 {{Tensor Core instruction shape 16, 8, 8 cannot be tiled on warp shape 64, 8, 4 with compilation pipeline LLVMGPUMatmulTensorCoreMmaSync}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xf32>, memref<512x256xf32>) outs(%2 : memref<1024x256xf32>)
return
@@ -284,20 +260,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 64]]>
#translation = #iree_codegen.translation_info<LLVMGPUMatmulTensorCoreMmaSync workgroup_size = [128, 1, 1], {pipeline_depth = 4 : i64, store_stage = 1 : i64}>
func.func @illegal() attributes {translation_info = #translation} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1024x512xi8>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<512x256xi8>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1024x256xi8>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1024x512xi8>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<512x256xi8>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1024x256xi8>
// expected-error @+1 {{Expected f16, bf16 or f32 for Tensor Core (MMA.SYNC) pipeline}}
linalg.matmul {lowering_config = #config} ins(%0, %1 : memref<1024x512xi8>, memref<512x256xi8>) outs(%2 : memref<1024x256xi8>)
return
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
index 18100b1..f292df7 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/linalg_transform.mlir
@@ -10,19 +10,17 @@
// RUN: --iree-codegen-transform-dialect-library=%p/transform_dialect_codegen_foreach_to_gpu_spec.mlir@__transform_main | \
// RUN: FileCheck %s --check-prefix=FOREACH-TO-GPU
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
func.func @matmul_static_dispatch_0() attributes {hal.executable.target = #executable_target_cuda_nvptx_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>> -> tensor<500x1020xf32>
%5 = tensor.empty() : tensor<250x1020xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir
index 1082caf..73bdb91 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir
@@ -1,17 +1,15 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-llvmgpu-bufferization-pipeline))" --split-input-file %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @bufferize_with_thread_private_memory(%arg0: index) {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
%cst_ved = arith.constant dense<0.000000e+00> : vector<1x1x4x4xf16>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<320xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf16>>
%2 = flow.dispatch.tensor.load %1, offsets = [%arg0, %arg0, %arg0, %arg0], sizes = [1, 1, 8, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<2x320x64x64xf16>> -> tensor<1x1x8x64xf16>
%3 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<320xf16>> -> tensor<1xf16>
%4 = scf.forall (%arg1, %arg2) in (2, 16) shared_outs(%arg3 = %2) -> (tensor<1x1x8x64xf16>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir
index 8ce12fb..25b5957 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_extract_address_computation.mlir
@@ -73,7 +73,11 @@
// Just double check that we captured the IV
// CHECK: %[[IV_NEXT:.*]] = llvm.mul %[[IV]], %[[C8192]] : i64
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @matmul_dispatch_0 {
hal.executable.variant public @cuda_nvptx_fb target(#executable_target_cuda_nvptx_fb) {
hal.executable.export public @matmul_dispatch_0_matmul_2560x2560x2560 ordinal(0) layout(#pipeline_layout) {
@@ -85,9 +89,9 @@
func.func @matmul_dispatch_0_matmul_2560x2560x2560() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2560x2560xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2560x2560xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2560, 2560], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>> -> tensor<2560x2560xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2560, 2560], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2560x2560xf16>> -> tensor<2560x2560xf16>
%5 = tensor.empty() : tensor<2560x2560xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
index ab7136c..28ae306 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_mma_sync_pipeline_test.mlir
@@ -5,12 +5,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @mma_fused_fp16 {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -25,10 +23,10 @@
%cst = arith.constant 0.000000e+00 : f16
%c2048 = arith.constant 2048 : index
%c512 = arith.constant 512 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf16>>
- %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf16>>
+ %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>> -> tensor<2048x1024xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1]
@@ -87,12 +85,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @mma_fused_f32 {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -107,10 +103,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c2048 = arith.constant 2048 : index
%c512 = arith.constant 512 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
- %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
+ %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>> -> tensor<2048x1024xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
index 975f73d..c591885 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir
@@ -4,12 +4,10 @@
// Verify that a simple element wise op gets lowered succefully all the way to
// nvvm/llvm dialect.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @simpleMath_ex_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -21,9 +19,9 @@
builtin.module {
func.func @add_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
%3 = tensor.empty() : tensor<16xf32>
%4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor<readonly:tensor<16xf32>> -> tensor<16xf32>
%5 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor<readonly:tensor<16xf32>> -> tensor<16xf32>
@@ -48,12 +46,10 @@
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0)[s0] -> (s0, -d0 + 1024)>
#map2 = affine_map<(d0)[s0] -> (-d0 + 1024, s0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @dot_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -68,9 +64,9 @@
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
@@ -119,12 +115,10 @@
],
iterator_types = ["parallel", "parallel", "reduction"]
}
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @dot_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -139,9 +133,9 @@
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
@@ -172,12 +166,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @conv2d_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -193,9 +185,9 @@
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x4x4x2xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x2x2x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x2x3x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x4x4x2xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x2x2x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x2x3x1xf32>>
%11 = flow.dispatch.tensor.load %0, offsets = [0, 0 ,0, 0], sizes = [1, 4, 4, 2], strides = [1, 1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<1x4x4x2xf32>> -> tensor<1x4x4x2xf32>
%13 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 2, 2, 1], strides = [1, 1, 1, 1]
@@ -221,11 +213,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @simpleMath_ex_dispatch_0 {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -237,8 +227,8 @@
builtin.module {
func.func @add_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
%3 = tensor.empty() : tensor<16xf32>
%4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor<readonly:tensor<16xf32>> -> tensor<16xf32>
%5 = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32>
@@ -261,11 +251,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @reduction_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -279,8 +267,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%c96 = arith.constant 96 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<14x14x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<14x14x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<96xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [14, 14, 96], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<14x14x96xf32>> -> tensor<14x14x96xf32>
%8 = tensor.empty() : tensor<96xf32>
@@ -307,12 +295,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @vector_add_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -325,9 +311,9 @@
func.func @vector_add_dispatch() {
%c0 = arith.constant 0 : index
%c16384 = arith.constant 16384 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
%6 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [16384], strides = [1]
: !flow.dispatch.tensor<readonly:tensor<16384xf32>> -> tensor<16384xf32>
%8 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [16384], strides = [1]
@@ -361,11 +347,9 @@
#map2 = affine_map<(d0)[s0] -> (-d0 + 16384, s0)>
#map3 = affine_map<(d0, d1) -> (d1, d0)>
#map4 = affine_map<(d0, d1) -> (d0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @vector_reduction_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -379,8 +363,8 @@
%c0 = arith.constant 0 : index
%c16384 = arith.constant 16384 : index
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x16384xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x16384xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<16384xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 16384], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<512x16384xf32>> -> tensor<512x16384xf32>
%8 = tensor.empty() : tensor<16384xf32>
@@ -406,16 +390,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @mma_fused {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
+ hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
hal.return %x, %y, %z : index, index, index
@@ -426,10 +412,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c2048 = arith.constant 2048 : index
%c512 = arith.constant 512 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
- %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
+ %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<2048x1024xf32>> -> tensor<2048x1024xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1]
@@ -489,16 +475,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @mma_fused_fp16 {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {
+ hal.executable.export public @_large_aligned_dispatch_0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2 : index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
hal.return %x, %y, %z : index, index, index
@@ -509,10 +497,10 @@
%cst = arith.constant 0.000000e+00 : f16
%c2048 = arith.constant 2048 : index
%c512 = arith.constant 512 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf16>>
- %di = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf16>>
+ %di = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<2048x512xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<2048x1024xf16>> -> tensor<2048x1024xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1]
@@ -568,12 +556,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
@@ -597,11 +583,11 @@
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0)
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0)
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0)
: !flow.dispatch.tensor<readonly:tensor<4x1024x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0)
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0)
: !flow.dispatch.tensor<writeonly:tensor<4x32x64xf32>>
%11 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 32, 1024], strides = [1, 1, 1]
: !flow.dispatch.tensor<readonly:tensor<4x32x1024xf32>> -> tensor<4x32x1024xf32>
@@ -648,12 +634,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
#map0 = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
@@ -674,9 +658,9 @@
func.func @split_k_gemm() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x4x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x256x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x4x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x256x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x2048x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2048, 4, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x4x256xf32>> -> tensor<2048x4x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 256, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x256x512xf32>> -> tensor<4x256x512xf32>
%5 = tensor.empty() : tensor<4x2048x512xf32>
@@ -718,12 +702,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable public @pooling_dynamic {
@@ -740,8 +722,8 @@
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%s = arith.index_cast %0 : i32 to index
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%s) : !flow.dispatch.tensor<readonly:tensor<?x2048x?x?xf32>>{%s, %s, %s}
- %15 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%s) : !flow.dispatch.tensor<writeonly:tensor<?x2048x1x1xf32>>{%s}
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%s) : !flow.dispatch.tensor<readonly:tensor<?x2048x?x?xf32>>{%s, %s, %s}
+ %15 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%s) : !flow.dispatch.tensor<writeonly:tensor<?x2048x1x1xf32>>{%s}
%16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0, 0], sizes = [%s, 2048, %s, %s], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x2048x?x?xf32>>{%s, %s, %s} -> tensor<?x2048x?x?xf32>
%19 = tensor.empty(%s) : tensor<?x2048x1x1xf32>
%38 = tensor.empty(%s, %s) : tensor<?x?xf32>
@@ -765,11 +747,9 @@
#map2 = affine_map<(d0)[s0] -> (-d0 + 16384, s0)>
#map3 = affine_map<(d0, d1) -> (d0, d1)>
#map4 = affine_map<(d0, d1) -> (d0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @warp_reduction_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -783,8 +763,8 @@
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%8 = tensor.empty() : tensor<512xf32>
@@ -818,11 +798,9 @@
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map3 = affine_map<(d0, d1) -> (d0, d1)>
#map4 = affine_map<(d0, d1) -> (d0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @warp_reduction_broadcast_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -837,8 +815,8 @@
%c1024 = arith.constant 1024 : index
%cst_0 = arith.constant 3.840000e+02 : f32
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x1024xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<512x1024xf32>> -> tensor<512x1024xf32>
%8 = tensor.empty() : tensor<512xf32>
@@ -879,15 +857,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @shared_mem_alloc {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @shared_mem_alloc ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>]>]>) {
+ hal.executable.export public @shared_mem_alloc ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2, %arg3, %arg4, %arg5
hal.return %x, %y, %z : index, index, index
@@ -896,8 +872,8 @@
func.func @shared_mem_alloc() {
%c0 = arith.constant 0 : index
%cst = arith.constant dense<0xFF800000> : tensor<14x14x480xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<29x29x480xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<14x14x480xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<29x29x480xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<14x14x480xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [29, 29, 480], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<29x29x480xf32>> -> tensor<29x29x480xf32>
%3 = tensor.empty() : tensor<3x3xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0 * 2 + d3, d1 * 2 + d4, d2)>, affine_map<(d0, d1, d2, d3, d4) -> (d3, d4)>, affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%2, %3 : tensor<29x29x480xf32>, tensor<3x3xf32>) outs(%cst : tensor<14x14x480xf32>) {
@@ -928,11 +904,9 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[32,32]]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map0 = affine_map<(d0, d1) -> (d1, d0)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -946,8 +920,8 @@
builtin.module {
func.func @shared_mem_transpose() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 768], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>> -> tensor<2048x768xf32>
%3 = tensor.empty() : tensor<768x2048xf32>
%4 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<2048x768xf32>) outs(%3 : tensor<768x2048xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
index bb741ac..bb7722c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_pack() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x16x16x32xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x16x16x32xi32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi32>> -> tensor<128x256xi32>
%3 = tensor.empty() : tensor<4x16x16x32xi32>
%pack = tensor.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir
index 45eb7ad..e2ef4ee 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/promote_matmul_to_fit_mma.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-promote-matmul-to-fit-mma{target-dimensions=parallel}))" %s | FileCheck %s --check-prefixes=ALL,PARALLEL
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-promote-matmul-to-fit-mma{target-dimensions=reduction}))" %s | FileCheck %s --check-prefixes=ALL,REDUCTION
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<()[s0] -> (s0 * 128)>
@@ -17,9 +15,9 @@
func.func @batch_matmul_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply #map()[%workgroup_id_y]
@@ -36,9 +34,9 @@
return
}
// ALL-LABEL: func.func @batch_matmul_f16
-// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
-// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
-// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
// ALL-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_HANDLE]]
// ALL-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_HANDLE]]
// PARALLEL: %[[PADDED_LHS:.+]] = tensor.pad %[[LHS]]
@@ -67,12 +65,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 64)>
#map1 = affine_map<()[s0] -> (s0 * 128)>
@@ -88,9 +84,9 @@
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
%workgroup_id_z = hal.interface.workgroup.id[2] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply #map()[%workgroup_id_y]
@@ -128,9 +124,9 @@
// The padding on parallel dims is a nop because they are already padded. Skip
// the check for the testcase.
// ALL-LABEL: func.func @batch_matmul_pad_reduction_after_tiling
-// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
-// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
-// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
+// ALL: %[[LHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x968x1281xf16>>
+// ALL: %[[RHS_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<64x1281x1281xf16>>
+// ALL: %[[OUT_HANDLE:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x968x1281xf16>>
// ALL-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_HANDLE]]
// ALL-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_HANDLE]]
// REDUCTION: %[[INIT:.+]] = tensor.empty() : tensor<1x64x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
index bfc69ed..cbab841 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @warp_reduction_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -18,8 +16,8 @@
%c0 = arith.constant 0 : index
%c10240 = arith.constant 10240 : index
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 10240], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<512x10240xf32>> -> tensor<512x10240xf32>
%8 = tensor.empty() : tensor<512xf32>
@@ -103,11 +101,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @warp_reduction_broadcast_dispatch {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -122,8 +118,8 @@
%c10240 = arith.constant 10240 : index
%cst_0 = arith.constant 3.840000e+02 : f32
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x10240xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512x10240xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<512x10240xf32>> -> tensor<512x10240xf32>
%8 = tensor.empty() : tensor<512xf32>
@@ -196,11 +192,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @softmax {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
@@ -215,8 +209,8 @@
%cst = arith.constant -3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>> -> tensor<12x128x40960xf32>
%3 = tensor.empty() : tensor<12x128x40960xf32>
%4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir
index b890571..c46f738 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_rocm.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx940 --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s --check-prefix=CDNA3
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @softmax() {
%c0 = arith.constant 0 : index
%cst = arith.constant -3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>> -> tensor<12x128x40960xf32>
%3 = tensor.empty() : tensor<12x128x40960xf32>
%4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32>
@@ -28,19 +26,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @softmax() {
%c0 = arith.constant 0 : index
%cst = arith.constant -3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>> -> tensor<12x128x40960xf32>
%3 = tensor.empty() : tensor<12x128x40960xf32>
%4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32>
@@ -57,11 +53,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dynamic_softmax() {
%c32_i64 = arith.constant 32 : i64
@@ -74,8 +68,8 @@
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
%7 = flow.dispatch.workload.ordinal %6, 0 : index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%7}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?xf16>>{%7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%7}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?xf16>>{%7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [32, %7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%7} -> tensor<32x?xf16>
%11 = tensor.empty(%7) : tensor<32x?xf16>
%12 = linalg.softmax dimension(1) ins(%10 : tensor<32x?xf16>) outs(%11 : tensor<32x?xf16>) -> tensor<32x?xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
index 3e22bd8..e3b16eb 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_cuda.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @small_reduction {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -17,8 +15,8 @@
func.func @small_reduction() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x13xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1024x13xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 13], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x13xf32>> -> tensor<1024x13xf32>
%3 = tensor.empty() : tensor<1024xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<1024xf32>) -> tensor<1024xf32>
@@ -52,11 +50,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -69,8 +65,8 @@
func.func @group_reduction() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x64xf32>> -> tensor<8x64xf32>
%3 = tensor.empty() : tensor<8xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<8xf32>) -> tensor<8xf32>
@@ -121,11 +117,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_elementwise_reduction_elementwise {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -138,8 +132,8 @@
func.func @group_elementwise_reduction_elementwise() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x64xf32>> -> tensor<8x64xf32>
%3 = tensor.empty() : tensor<8xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<8xf32>) -> tensor<8xf32>
@@ -198,11 +192,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction_larger {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -215,8 +207,8 @@
func.func @group_reduction_larger() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<33xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<33x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<33xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [33, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<33x1024xf32>> -> tensor<33x1024xf32>
%3 = tensor.empty() : tensor<33xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<33xf32>) -> tensor<33xf32>
@@ -268,11 +260,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction_1d {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -285,8 +275,8 @@
func.func @group_reduction_1d() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor<readonly:tensor<64xf32>> -> tensor<64xf32>
%3 = tensor.empty() : tensor<f32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<f32>) -> tensor<f32>
@@ -307,11 +297,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_elementwise_reduction_elementwise_4d {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -324,8 +312,8 @@
func.func @group_elementwise_reduction_elementwise_4d() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x8x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x4x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x4x8x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x4x8xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 4, 8, 64], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x8x64xf32>> -> tensor<2x4x8x64xf32>
%3 = tensor.empty() : tensor<2x4x8xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
@@ -355,11 +343,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction_i8_12345 {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -372,8 +358,8 @@
func.func @group_reduction_i8_12345() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0 : i8
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x12345xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x12345xi8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x12345xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x12345xi8>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 12345], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x12345xi8>> -> tensor<8x12345xi8>
%3 = tensor.empty() : tensor<8x12345xi8>
%4 = tensor.empty() : tensor<8xi8>
@@ -440,11 +426,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -460,8 +444,8 @@
func.func @reduction_2d_trailing_elementwise_static_dispatch_0_generic_128x10_f32() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x10xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x10xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 10], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x10xf32>> -> tensor<128x10xf32>
%3 = tensor.empty() : tensor<128x10xf32>
%4 = tensor.empty() : tensor<128xf32>
@@ -509,14 +493,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @i4_dequant_matvec {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb">) {
@@ -529,11 +511,11 @@
func.func @i4_dequant_matvec() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>> -> tensor<4096x32x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir
index cfa16f7..fea7846 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_transform_rocm.mlir
@@ -5,11 +5,9 @@
// RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target)))))" \
// RUN: %s | FileCheck %s --check-prefix=CDNA3
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction_1d {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -22,8 +20,8 @@
func.func @group_reduction_1d() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor<readonly:tensor<64xf32>> -> tensor<64xf32>
%3 = tensor.empty() : tensor<f32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<f32>) -> tensor<f32>
@@ -46,11 +44,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @group_reduction_1d {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -63,8 +59,8 @@
func.func @group_reduction_1d() {
%c0 = arith.constant 0 : index
%cst = arith.constant -0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<f32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [64], strides = [1] : !flow.dispatch.tensor<readonly:tensor<64xf32>> -> tensor<64xf32>
%3 = tensor.empty() : tensor<f32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<f32>) -> tensor<f32>
@@ -88,14 +84,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @i4_dequant_matvec {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -108,11 +102,11 @@
func.func @i4_dequant_matvec() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>> -> tensor<4096x32x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
@@ -165,14 +159,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @i4_dequant_matvec {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -185,11 +177,11 @@
func.func @i4_dequant_matvec() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>> -> tensor<4096x32x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32xf16>> -> tensor<4096x32xf16>
@@ -224,12 +216,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matvec_fp16 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -242,9 +232,9 @@
func.func @matvec_fp16() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>> -> tensor<1x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<1x32000xf16>
@@ -287,12 +277,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matvec_fp16 {
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -305,9 +293,9 @@
func.func @matvec_fp16() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x32000xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096xf16>> -> tensor<1x4096xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32000, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32000x4096xf16>> -> tensor<32000x4096xf16>
%5 = tensor.empty() : tensor<1x32000xf16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
index 6a060af..7736a40 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/rocdl_pipeline_test.mlir
@@ -5,12 +5,10 @@
// Verify that a simple element wise op gets lowered succefully all the way to
// nvvm/llvm dialect.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @simpleMath_ex_dispatch_0 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -22,9 +20,9 @@
builtin.module {
func.func @add_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
%3 = tensor.empty() : tensor<16xf32>
%4 = flow.dispatch.tensor.load %0, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor<readonly:tensor<16xf32>> -> tensor<16xf32>
%5 = flow.dispatch.tensor.load %1, offsets=[0], sizes=[16], strides=[1] : !flow.dispatch.tensor<readonly:tensor<16xf32>> -> tensor<16xf32>
@@ -49,12 +47,10 @@
#map0 = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0)[s0] -> (s0, -d0 + 1024)>
#map2 = affine_map<(d0)[s0] -> (-d0 + 1024, s0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @dot_dispatch_0 {
hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
@@ -69,9 +65,9 @@
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>>
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
: !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> -> tensor<1024x1024xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 1024], strides = [1, 1]
@@ -106,12 +102,10 @@
// -----
#map = affine_map<(d0) -> (d0)>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @ext_fp8_dispatch {
hal.executable.variant @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb">) {
@@ -123,9 +117,9 @@
builtin.module {
func.func @ext_fp8_dispatch() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf8E4M3FNUZ>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf8E5M2FNUZ>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf8E4M3FNUZ>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096xf8E5M2FNUZ>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4096xf8E4M3FNUZ>> -> tensor<4096xf8E4M3FNUZ>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4096], strides = [1] : !flow.dispatch.tensor<readonly:tensor<4096xf8E5M2FNUZ>> -> tensor<4096xf8E5M2FNUZ>
%5 = tensor.empty() : tensor<4096xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
index 186fcf5..f1ced7b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_batch_matmul.mlir
@@ -14,12 +14,10 @@
// RUN: -td-matmul-strategy-use-fma=true \
// RUN: | FileCheck %s --check-prefixes=CHECK,OPTIONS
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -27,9 +25,9 @@
func.func @batch_matmul_dispatch_0_generic_128x80x320x32_f32() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x320xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x80x320xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>> -> tensor<128x80x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>> -> tensor<128x32x320xf32>
%5 = tensor.empty() : tensor<128x80x320xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir
index 4c0ecc9..445a64c 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_convolution.mlir
@@ -1,19 +1,17 @@
// RUN: iree-opt %s --split-input-file --iree-codegen-llvmgpu-enable-transform-dialect-jit= --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" \
// RUN: --iree-gpu-test-target=sm_80 --iree-codegen-llvmgpu-enable-transform-dialect-implicit-gemm-strategy | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @nchw_convolution() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x128x258x258xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x128x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x128x258x258xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x128x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 128, 258, 258], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x128x258x258xf32>> -> tensor<8x128x258x258xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [256, 128, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<256x128x3x3xf32>> -> tensor<256x128x3x3xf32>
%5 = tensor.empty() : tensor<8x256x256x256xf32>
@@ -67,19 +65,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @nhwc_convolution() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x258x258x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x128x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x258x258x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x128x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x256xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 258, 258, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x258x258x128xf32>> -> tensor<8x258x258x128xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 128, 256], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x128x256xf32>> -> tensor<3x3x128x256xf32>
%5 = tensor.empty() : tensor<8x256x256x256xf32>
@@ -107,19 +103,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unaligned_convolution() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x258x258x132xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x132x264xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x264xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x258x258x132xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x132x264xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x256x256x264xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [8, 258, 258, 132], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x258x258x132xf32>> -> tensor<8x258x258x132xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 132, 264], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x132x264xf32>> -> tensor<3x3x132x264xf32>
%5 = tensor.empty() : tensor<8x256x256x264xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir
index f1eecaf..2e41bfe 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_matmul.mlir
@@ -43,19 +43,17 @@
// RUN: iree-opt %s --split-input-file --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy)" \
// RUN: --iree-gpu-test-target=sm_80 --iree-codegen-llvmgpu-enable-transform-dialect-small-matmul | FileCheck --check-prefix=SMALL %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>> -> tensor<2052x2556xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>> -> tensor<2556x2052xf32>
%5 = tensor.empty() : tensor<2052x2052xf32>
@@ -205,19 +203,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_2() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2051x2555xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2555x2050xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2051x2050xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2051x2555xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2555x2050xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2051x2050xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2051, 2555], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2051x2555xf32>> -> tensor<2051x2555xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2555, 2051], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2555x2050xf32>> -> tensor<2555x2050xf32>
%5 = tensor.empty() : tensor<2051x2050xf32>
@@ -255,19 +251,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_3() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2556xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2556xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x2556xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2556xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2556xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x2556xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x2556xf32>> -> tensor<2048x2556xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2556x2556xf32>> -> tensor<2556x2556xf32>
%5 = tensor.empty() : tensor<2048x2556xf32>
@@ -287,19 +281,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_4_partially_unaligned() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2044xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2044xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x2044xf32>> -> tensor<2048x2044xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>> -> tensor<2044x1024xf32>
%5 = tensor.empty() : tensor<2048x1024xf32>
@@ -355,19 +347,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @aligned_matmul() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>> -> tensor<2048x2048xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2048, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x2048xf32>> -> tensor<2048x2048xf32>
%5 = tensor.empty() : tensor<2048x2048xf32>
@@ -422,19 +412,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_5_small() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x2044xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x2044xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 2044], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2044xf32>> -> tensor<2x2044xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2044, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2044x1024xf32>> -> tensor<2044x1024xf32>
%5 = tensor.empty() : tensor<2x1024xf32>
@@ -461,19 +449,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @f16_matmul() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2052x2556xf16>> -> tensor<2052x2556xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2556x2052xf16>> -> tensor<2556x2052xf16>
%5 = tensor.empty() : tensor<2052x2052xf16>
@@ -494,19 +480,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @int8_matmul() {
%c0 = arith.constant 0 : index
%c0_i8 = arith.constant 0 : i8
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4x2556xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x2052xi8>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4x2556xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x2052xi8>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x2556xi8>> -> tensor<4x2556xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2556x2052xi8>> -> tensor<2556x2052xi8>
%5 = tensor.empty() : tensor<4x2052xi8>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir
index 76fb368..599ea92 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy_pad.mlir
@@ -16,18 +16,16 @@
// RUN: --td-pad-strategy-use-async-copies=false \
// RUN: | FileCheck --check-prefix=WITH_OPTIONS %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pad() {
%c0 = arith.constant 0 : index
%c56 = arith.constant 56 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<123x456xf32>> -> tensor<123x456xf32>
%cst_0 = arith.constant 0.000000e+00 : f32
%padded = tensor.pad %2 low[%c0, 0] high[5, %c56] {
@@ -98,17 +96,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pad_low() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<123x456xf32>> -> tensor<123x456xf32>
%cst_0 = arith.constant 0.000000e+00 : f32
%padded = tensor.pad %2 low[5, 0] high[0, 56] {
@@ -127,17 +123,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pad_local() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<123x456xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [123, 456], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<123x456xf32>> -> tensor<123x456xf32>
%padded = tensor.pad %2 low[0, 0] high[5, 56] {
^bb0(%arg0: index, %arg1: index):
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
index 01904e1..48fc842 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
@@ -1,17 +1,15 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tensor-pad),fold-memref-alias-ops,canonicalize,cse)" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transpose_no_align_dispatch_0_generic_48x32() {
%c48 = arith.constant 48 : index
%c32 = arith.constant 32 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x48xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<48x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x48xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<48x32xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -40,8 +38,8 @@
// CHECK: %[[C48:.*]] = arith.constant 48 : index
// CHECK: %[[C32:.*]] = arith.constant 32 : index
// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[D0:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor<readonly:tensor<32x48xf32>>
-// CHECK: %[[D1:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor<writeonly:tensor<48x32xf32>>
+// CHECK: %[[D0:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor<readonly:tensor<32x48xf32>>
+// CHECK: %[[D1:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : !flow.dispatch.tensor<writeonly:tensor<48x32xf32>>
// CHECK: %[[WORKGROUP_ID_X:.*]] = hal.interface.workgroup.id[0] : index
// CHECK: %[[WORKGROUP_COUNT_X:.*]] = hal.interface.workgroup.count[0] : index
// CHECK: %[[WORKGROUP_ID_Y:.*]] = hal.interface.workgroup.id[1] : index
@@ -73,11 +71,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (s0 * 16)>
#map1 = affine_map<(d0)[s0] -> (-d0 + s0, 16)>
@@ -94,8 +90,8 @@
%5 = arith.index_castui %1 : i32 to index
%6 = arith.index_castui %2 : i32 to index
%7 = arith.index_castui %3 : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x2x2xi32>>{%4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c64) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x2x2xi32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -125,7 +121,7 @@
return
}
// CHECK-LABEL: func.func @unpack_dynamic
-// CHECK: %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[DEST_BUF]]
// CHECK: %[[PAD:.+]] = tensor.pad %[[LOAD]]
// CHECK: %[[UNPACK:.+]] = tensor.unpack {{.+}} into %[[PAD]]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
index edc882b..ba696c7 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensorcore_vectorization.mlir
@@ -1,20 +1,18 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-llvmgpu-tensorcore-vectorization))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dot() {
%c16 = arith.constant 16 : index
%c1024 = arith.constant 1024 : index
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1024x512xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2048x512xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1024x512xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2048x512xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_y]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
index 25e19a8..5354ca0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
@@ -1,17 +1,15 @@
// RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @pad_matmul_static_dispatch_0() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<250x1020xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>> -> tensor<500x1020xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
index 024c901..4b4a465 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_promote_operands.mlir
@@ -1,17 +1,15 @@
// RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @pad_matmul_static_dispatch_0 {
builtin.module {
func.func @pad_matmul_static_dispatch_0(%arg0: tensor<250x500xf32>, %arg1: tensor<500x1020xf32>) -> tensor<250x1020xf32> {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<250x500xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<500x1020xf32>> -> tensor<500x1020xf32>
@@ -19,8 +17,8 @@
%cst = arith.constant 0.000000e+00 : f32
%5 = linalg.fill ins(%cst : f32) outs(%50 : tensor<250x1020xf32>) -> tensor<250x1020xf32>
// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
- // CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64)
- // CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64)
+ // CHECK: %[[D0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64)
+ // CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64)
// CHECK: %[[D2:.+]] = flow.dispatch.tensor.load %[[D0]], offsets = [0, 0], sizes = [250, 500]
// CHECK: %[[D3:.+]] = flow.dispatch.tensor.load %[[D1]], offsets = [0, 0], sizes = [500, 1020]
// CHECK: %[[D4:.+]] = tensor.empty() : tensor<250x1020xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir
index da19ad2..3e47fe8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_vector_distribution.mlir
@@ -6,16 +6,14 @@
// RUN: --allow-unregistered-dialect | \
// RUN: FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
#translation_info = #iree_codegen.translation_info<None workgroup_size = [64, 1, 1] subgroup_size = 32>
func.func @reduce_dispatch_0() attributes {translation_info = #translation_info} {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128xf32>
memref.assume_alignment %0, 64 : memref<128xf32>
%1 = gpu.thread_id x
%2 = arith.cmpi ult, %1, %c1 : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir
index 4102664..4056c42 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_distribute_forall.mlir
@@ -1,9 +1,7 @@
// RUN: iree-opt %s --pass-pipeline="builtin.module(iree-codegen-lower-executable-using-transform-dialect)" | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
#translation = #iree_codegen.translation_info<TransformDialectCodegen, { config_test = "config_test" }>
@@ -13,7 +11,7 @@
%c250 = arith.constant 250 : index
%c8 = arith.constant 8 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<2xf16>
memref.assume_alignment %0, 64 : memref<2xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%subview = memref.subview %0[%workgroup_id_x] [1] [1] : memref<2xf16> to memref<1xf16, strided<[1], offset: ?>>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir
index 57f8c82..4975c58 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_gpu_pipelining.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matmul_pipelining {
builtin.module {
@@ -21,11 +19,11 @@
%3 = gpu.thread_id z
%4 = memref.alloc() : memref<4x32x40xf16, 3>
%5 = memref.alloc() : memref<4x32x40xf16, 3>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<3456x2048xf16>
memref.assume_alignment %6, 64 : memref<3456x2048xf16>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<2048x1024xf16>
memref.assume_alignment %7, 64 : memref<2048x1024xf16>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<3456x1024xf16>
memref.assume_alignment %8, 64 : memref<3456x1024xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
index 2159e58..9fae267 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt %s --split-input-file -iree-transform-dialect-interpreter -transform-dialect-drop-schedule | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matmul {
builtin.module {
@@ -17,11 +15,11 @@
%c16 = arith.constant 16 : index
%c32 = arith.constant 32 : index
%cst_0 = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %0, 64 : memref<32x32xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %1, 64 : memref<32x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %2, 64 : memref<32x32xf32>
%3 = gpu.thread_id x
%4 = gpu.thread_id y
@@ -77,12 +75,10 @@
// -----
// Verify that unrolling does not apply to rank 1 elementwise vector ops.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @gathered_matmul {
builtin.module {
@@ -98,11 +94,11 @@
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
%cst_2 = arith.constant dense<1> : vector<4x4xindex>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %0, 64 : memref<32x32xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %1, 64 : memref<32x32xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf32>
memref.assume_alignment %2, 64 : memref<32x32xf32>
%alloc = memref.alloc() {alignment = 64 : i64} : memref<32x32xf32>
%3 = gpu.thread_id x
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir
index 09357d9..8aa8774 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transpose_pipeline_test.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_80 \
// RUN: --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target, fold-memref-alias-ops, canonicalize, cse)))))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_dispatch_0 {
@@ -18,8 +16,8 @@
builtin.module {
func.func @transpose_dispatch_0_generic_4096x4096() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>> -> tensor<4096x4096xf32>
%3 = tensor.empty() : tensor<4096x4096xf32>
%4 = linalg.generic {indexing_maps = [ affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%2 : tensor<4096x4096xf32>) outs(%3 : tensor<4096x4096xf32>) {
@@ -40,9 +38,9 @@
// CHECK-DAG: %[[D1:.*]] = gpu.thread_id y
// CHECK-DAG: %[[D2:.*]] = gpu.thread_id z
// CHECK-DAG: %[[D3:.*]] = memref.alloc() : memref<32x33xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D4]], 64 : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D5]], 64 : memref<4096x4096xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: gpu.barrier
// CHECK: %[[D6:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]]
@@ -61,12 +59,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_single_operand_dispatch_0_generic_768x2048 {
@@ -79,9 +75,9 @@
builtin.module {
func.func @transpose_single_operand_dispatch_0_generic_768x2048() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<768x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<768x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2048, 768], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x768xf32>> -> tensor<2048x768xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [768, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<768x2048xf32>> -> tensor<768x2048xf32>
%5 = tensor.empty() : tensor<768x2048xf32>
@@ -104,11 +100,11 @@
// CHECK: %[[D1:.*]] = gpu.thread_id y
// CHECK: %[[D2:.*]] = gpu.thread_id z
// CHECK: %[[D3:.*]] = memref.alloc() : memref<32x33xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<2048x768xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<2048x768xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D4]], 64 : memref<2048x768xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D5]], 64 : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D6]], 64 : memref<768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: gpu.barrier
// CHECK: %[[D7:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]]
@@ -129,12 +125,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_3d_no_dispatch_0_generic_768x2048x1024 {
@@ -147,9 +141,9 @@
builtin.module {
func.func @transpose_3d_no_dispatch_0_generic_768x2048x1024() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<768x2048x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<768x2048x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x2048x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2048, 768, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x768x1024xf32>> -> tensor<2048x768x1024xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [768, 2048, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<768x2048x1024xf32>> -> tensor<768x2048x1024xf32>
%5 = tensor.empty() : tensor<768x2048x1024xf32>
@@ -172,12 +166,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_3d_yes_dispatch_0_generic_10x768x2048 {
@@ -190,9 +182,9 @@
builtin.module {
func.func @transpose_3d_yes_dispatch_0_generic_10x768x2048() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x2048x768xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x768x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x2048x768xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x768x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 2048, 768], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x2048x768xf32>> -> tensor<10x2048x768xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>> -> tensor<10x768x2048xf32>
%5 = tensor.empty() : tensor<10x768x2048xf32>
@@ -215,11 +207,11 @@
// CHECK: %[[D1:.*]] = gpu.thread_id y
// CHECK: %[[D2:.*]] = gpu.thread_id z
// CHECK: %[[D3:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D4:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D4]], 64 : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D5]], 64 : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D6]], 64 : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: gpu.barrier
// CHECK: %[[D7:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]]
@@ -240,12 +232,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_3d_trans_out_dispatch_0_generic_10x2048x768 {
@@ -258,9 +248,9 @@
builtin.module {
func.func @transpose_3d_trans_out_dispatch_0_generic_10x2048x768() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x2048x768xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x2048x768xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>> -> tensor<10x768x2048xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [10, 768, 2048], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x768x2048xf32>> -> tensor<10x768x2048xf32>
%5 = tensor.empty() : tensor<10x2048x768xf32>
@@ -284,11 +274,11 @@
// CHECK: %[[D2:.*]] = gpu.thread_id z
// CHECK: %[[D3:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space<workgroup>>
// CHECK: %[[D4:.*]] = memref.alloc() : memref<1x32x33xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D5:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D5]], 64 : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D6:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]]) : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D6]], 64 : memref<10x768x2048xf32, #hal.descriptor_type<storage_buffer>>
-// CHECK: %[[D7:.*]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
+// CHECK: %[[D7:.*]] = hal.interface.binding.subspan layout({{.+}}) binding(2) alignment(64) offset(%[[C0]]) : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: memref.assume_alignment %[[D7]], 64 : memref<10x2048x768xf32, #hal.descriptor_type<storage_buffer>>
// CHECK: gpu.barrier
// CHECK: %[[D8:.*]] = affine.apply #{{.*}}()[%{{.*}}, %[[D0]], %[[D1]], %[[D2]]]
@@ -311,12 +301,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
hal.executable @transpose_3d_diff_dispatch_0_generic_10x768x2048 {
@@ -333,9 +321,9 @@
%c768 = arith.constant 768 : index
%c2048 = arith.constant 2048 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x2048x768xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768x10xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x768x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x2048x768xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2048x768x10xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x768x2048xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir
index f231c8b..857f4bd 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ukernel_pipeline_transform.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}>
#map = affine_map<(d0) -> (d0)>
@@ -21,9 +19,9 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
%10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8} -> tensor<?xf16>
%11 = tensor.empty() : tensor<i64>
%12 = tensor.empty() : tensor<f16>
@@ -49,11 +47,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -70,9 +66,9 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16xi64>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16xi64>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x?xf32>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x?xf32>>{%8}
%10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [16, %8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x?xf32>>{%8} -> tensor<16x?xf32>
%11 = tensor.empty() : tensor<16xi64>
%12 = tensor.empty() : tensor<16xf32>
@@ -100,11 +96,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
#map = affine_map<(d0) -> (d0)>
@@ -121,9 +115,9 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
%10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8} -> tensor<?xf16>
%11 = tensor.empty() : tensor<i64>
%12 = tensor.empty() : tensor<f16>
@@ -149,11 +143,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "argmax"}>
#map = affine_map<(d0) -> (d0)>
@@ -170,9 +162,9 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8}
%10 = flow.dispatch.tensor.load %9, offsets = [0], sizes = [%8], strides = [1] : !flow.dispatch.tensor<readonly:tensor<?xf16>>{%8} -> tensor<?xf16>
%11 = tensor.empty() : tensor<i64>
%12 = tensor.empty() : tensor<f16>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir
index fcb1192..4173142 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/winograd_pipeline_test.mlir
@@ -1,15 +1,13 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=gfx1100 --pass-pipeline="builtin.module(iree-llvmgpu-select-lowering-strategy, func.func(iree-llvmgpu-lower-executable-target))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_filter_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x64x128xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [3, 3, 64, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x64x128xf32>> -> tensor<3x3x64x128xf32>
%3 = tensor.empty() : tensor<8x8x64x128xf32>
%4 = iree_linalg_ext.winograd.filter_transform output_tile_size(6) kernel_size(3) kernel_dimensions([0, 1]) ins(%2 : tensor<3x3x64x128xf32>) outs(%3 : tensor<8x8x64x128xf32>) -> tensor<8x8x64x128xf32>
@@ -29,16 +27,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_input_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>> -> tensor<2x34x34x128xf16>
%3 = tensor.empty() : tensor<8x8x2x6x6x128xf16>
%4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16>
@@ -59,16 +55,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_output_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>> -> tensor<8x8x2x6x6x128xf16>
%3 = tensor.empty() : tensor<2x36x36x128xf16>
%4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
index a5c7df9..b785b3d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
@@ -117,7 +117,7 @@
/// Returns the (set, binding) pair for the given interface op.
static std::pair<uint32_t, uint32_t>
getInterfaceSetAndBinding(IREE::HAL::InterfaceBindingSubspanOp op) {
- return {op.getSet().getSExtValue(), op.getBinding().getSExtValue()};
+ return {0, op.getBinding().getSExtValue()};
}
/// Scans all hal.interface.binding.subspan ops in `module`, creates their
@@ -289,7 +289,7 @@
assert(exportOps.size() == 1);
auto layoutAttr = exportOps.front().getLayout();
- uint64_t elementCount = layoutAttr.getPushConstants();
+ uint64_t elementCount = layoutAttr.getConstants();
unsigned index = loadOp.getOrdinal().getZExtValue();
// The following function generates SPIR-V ops with i32 types. So it does
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp
index 6861e81..0e2734b 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEmulateI64.cpp
@@ -61,10 +61,9 @@
auto newOp =
rewriter.replaceOpWithNewOp<IREE::HAL::InterfaceBindingSubspanOp>(
- op, newResultTy, adaptor.getLayout(), adaptor.getSet(),
- adaptor.getBinding(), adaptor.getByteOffset(),
- adaptor.getDynamicDims(), adaptor.getAlignmentAttr(),
- adaptor.getDescriptorFlagsAttr());
+ op, newResultTy, adaptor.getLayout(), adaptor.getBinding(),
+ adaptor.getByteOffset(), adaptor.getDynamicDims(),
+ adaptor.getAlignmentAttr(), adaptor.getDescriptorFlagsAttr());
LLVM_DEBUG(llvm::dbgs()
<< "WideIntegerEmulation: new op: " << newOp << "\n");
(void)newOp;
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
index 817b121..94a3dc2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVEraseStorageBufferStaticShape.cpp
@@ -50,7 +50,7 @@
/// e.g.,
///
/// ```mlir
-/// hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+/// hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
/// offset(%offset)
/// : memref<16xf32>
/// ```
@@ -58,7 +58,7 @@
/// is re-written to
///
/// ```mlir
-/// hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0)
+/// hal.interface.binding.subspan layout(#pipeline_layout) binding(0)
/// offset(%offset)
/// : memref<?xf32>{%c16}
/// ```
@@ -87,9 +87,8 @@
auto newOp = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
subspanOp.getLoc(), newType, subspanOp.getLayoutAttr(),
- subspanOp.getSetAttr(), subspanOp.getBindingAttr(),
- subspanOp.getByteOffset(), dynamicDims, subspanOp.getAlignmentAttr(),
- subspanOp.getDescriptorFlagsAttr());
+ subspanOp.getBindingAttr(), subspanOp.getByteOffset(), dynamicDims,
+ subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr());
LLVM_DEBUG({
llvm::dbgs() << "Rewritten to: ";
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
index 5934189..de4c7fa 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorizeLoadStore.cpp
@@ -649,10 +649,9 @@
"cannot get vectorized memref type");
}
rewriter.replaceOpWithNewOp<IREE::HAL::InterfaceBindingSubspanOp>(
- subspanOp, *vecMemRef, subspanOp.getLayout(), subspanOp.getSet(),
- subspanOp.getBinding(), subspanOp.getByteOffset(),
- subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(),
- subspanOp.getDescriptorFlagsAttr());
+ subspanOp, *vecMemRef, subspanOp.getLayout(), subspanOp.getBinding(),
+ subspanOp.getByteOffset(), subspanOp.getDynamicDims(),
+ subspanOp.getAlignmentAttr(), subspanOp.getDescriptorFlagsAttr());
return success();
}
};
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir
index 7796ffc..1f541ff 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/annotate_winograd_loops.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-annotate-winograd-loops))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @_wino_input_dispatch_0() {
%c0 = arith.constant 0 : index
@@ -16,8 +14,8 @@
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
%0 = tensor.empty() : tensor<8x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x10x10x1280xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x2x2x1280xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x10x10x1280xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x2x2x1280xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -68,9 +66,9 @@
// CHECK: %[[C1:.+]] = arith.constant 1 : index
// CHECK: %[[C32:.+]] = arith.constant 32 : index
// CHECK: %[[D0:.+]] = tensor.empty() : tensor<8x8xf32>
-// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[C0]])
+// CHECK: %[[D1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[C0]])
// CHECK-SAME: : !flow.dispatch.tensor<readonly:tensor<2x10x10x1280xf32>>
-// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[C0]])
+// CHECK: %[[D2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[C0]])
// CHECK-SAME: : !flow.dispatch.tensor<writeonly:tensor<8x8x2x2x2x1280xf32>>
// CHECK: %[[WORKGROUP_ID_X:.+]] = hal.interface.workgroup.id[0] : index
// CHECK: %[[WORKGROUP_COUNT_X:.+]] = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
index 4e6ea89..221790a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
@@ -2,21 +2,19 @@
// Conv - large OC - distribute to only one workgroup dimension.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_112x112x512() {
%c0 = arith.constant 0 : index
%c512 = arith.constant 512 : index
%c112 = arith.constant 112 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>> -> tensor<3x3x3x512xf32>
%5 = tensor.empty() : tensor<1x112x112x512xf32>
@@ -37,21 +35,19 @@
// Conv - medium OC/OW/OH - distribute to two workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_112x112x32() {
%c0 = arith.constant 0 : index
%c32 = arith.constant 32 : index
%c112 = arith.constant 112 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>> -> tensor<3x3x3x32xf32>
%5 = tensor.empty() : tensor<1x112x112x32xf32>
@@ -72,20 +68,18 @@
// Conv - small OC/OW/OH - distribute to all three workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_16x16x16() {
%c0 = arith.constant 0 : index
%c16 = arith.constant 16 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x16x16x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x16x16x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>> -> tensor<1x33x33x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>> -> tensor<3x3x3x16xf32>
%5 = tensor.empty() : tensor<1x16x16x16xf32>
@@ -105,21 +99,19 @@
// Depthwise conv - small OC/OW/OH - distribute to all three workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dwconv_28x28x144() {
%c0 = arith.constant 0 : index
%c144 = arith.constant 144 : index
%c28 = arith.constant 28 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x144xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x144xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 57, 57, 144], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>> -> tensor<1x57x57x144xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>> -> tensor<3x3x144xf32>
%5 = tensor.empty() : tensor<1x28x28x144xf32>
@@ -140,21 +132,19 @@
// Depthwise conv - tiny OC/OW/OH - starving the GPU.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dwconv_4x4x8() {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x9x9x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x4x4x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x9x9x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x4x4x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 9, 9, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x9x9x8xf32>> -> tensor<1x9x9x8xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>> -> tensor<3x3x8xf32>
%5 = tensor.empty() : tensor<1x4x4x8xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
index 60a9e65..08d7676 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
@@ -2,21 +2,19 @@
// Large matmul that can match the best tiling scheme.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1024x2048x512() {
%c0 = arith.constant 0 : index
%c2048 = arith.constant 2048 : index
%c1024 = arith.constant 1024 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>> -> tensor<512x2048xf32>
%5 = tensor.empty() : tensor<1024x2048xf32>
@@ -37,21 +35,19 @@
// Small matmul N that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_3136x24x96() {
%c0 = arith.constant 0 : index
%c24 = arith.constant 24 : index
%c3136 = arith.constant 3136 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<96x24xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3136x24xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<96x24xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3136x24xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3136, 96], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>> -> tensor<3136x96xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<96x24xf32>> -> tensor<96x24xf32>
%5 = tensor.empty() : tensor<3136x24xf32>
@@ -72,21 +68,19 @@
// Small matmul M that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_196x64x192() {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c196 = arith.constant 196 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x192xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<192x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x192xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<192x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x64xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 192], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<196x192xf32>> -> tensor<196x192xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<192x64xf32>> -> tensor<192x64xf32>
%5 = tensor.empty() : tensor<196x64xf32>
@@ -107,21 +101,19 @@
// Small matmul K that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_12544x96x16() {
%c0 = arith.constant 0 : index
%c96 = arith.constant 96 : index
%c12544 = arith.constant 12544 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<12544x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x96xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<12544x96xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<12544x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x96xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<12544x96xf32>
linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>)
linalg.matmul {__internal_linalg_transform__ = "workgroup"} ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>)
return
@@ -138,21 +130,19 @@
// Odd matmul M and small N that cannot utilize all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_49x160x576() {
%c0 = arith.constant 0 : index
%c160 = arith.constant 160 : index
%c49 = arith.constant 49 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<49x576xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<576x160xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<49x160xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<49x576xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<576x160xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<49x160xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [49, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<49x576xf32>> -> tensor<49x576xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<576x160xf32>> -> tensor<576x160xf32>
%5 = tensor.empty() : tensor<49x160xf32>
@@ -173,21 +163,19 @@
// Large batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_4x384x384() {
%c0 = arith.constant 0 : index
%c384 = arith.constant 384 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x384x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x384x384xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 384, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>> -> tensor<4x384x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>> -> tensor<4x32x384xf32>
%5 = tensor.empty() : tensor<4x384x384xf32>
@@ -208,21 +196,19 @@
// Small batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_4x8x8() {
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x8x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x8x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x8x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x8x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 8, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x8x32xf32>> -> tensor<4x8x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>> -> tensor<4x32x8xf32>
%5 = tensor.empty() : tensor<4x8x8xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
index 2fa2c4d..e499af8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_conv.mlir
@@ -1,21 +1,19 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @nhwc_conv_pointwise_2x64x64x320() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x66x66x320xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x320x320xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x64x320xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x64x64x320xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x66x66x320xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x320x320xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x64x320xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x64x64x320xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 66, 66, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x66x66x320xf16>> -> tensor<2x66x66x320xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 320, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x320x320xf16>> -> tensor<3x3x320x320xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 64, 64, 320], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x64x64x320xf16>> -> tensor<2x64x64x320xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
index b986068..37bd863 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir
@@ -1,17 +1,15 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_f32_16x4096x40x4096() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x40xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x4096x40xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x40xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x4096x40xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>> -> tensor<16x4096x4096xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 4096, 40], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4096x40xf32>> -> tensor<16x4096x40xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 4096, 40], strides = [1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x4096x40xf32>> -> tensor<16x4096x40xf32>
@@ -30,19 +28,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_f16_64x640x320() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x320xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<320x640xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x640xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x320xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<320x640xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<64x640xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 320], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x320xf16>> -> tensor<64x320xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [320, 640], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<320x640xf16>> -> tensor<320x640xf16>
%5 = tensor.empty() : tensor<64x640xf16>
@@ -61,18 +57,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_f32_16x4096x40x4096() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x48xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x4096x48xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x48xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x4096x48xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf32>> -> tensor<16x4096x4096xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 4096, 48], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4096x48xf32>> -> tensor<16x4096x48xf32>
%5 = tensor.empty() : tensor<16x4096x48xf32>
@@ -91,20 +85,18 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @batch_matmul_f16_1x4096x4096x512() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x4096x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x512x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x4096x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x4096x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x512x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x4096x4096xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 4096, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096x512xf16>> -> tensor<1x4096x512xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 512, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x512x4096xf16>> -> tensor<1x512x4096xf16>
%5 = tensor.empty() : tensor<1x4096x4096xf32>
@@ -129,13 +121,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -154,11 +144,11 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>>
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x32x128xf32>>
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<512x11008xf32>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>>
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x32x128xf32>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<512x11008xf32>>
%15 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [11008, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>> -> tensor<11008x32x128xi4>
%16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>> -> tensor<11008x32xf32>
%17 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32xf32>> -> tensor<11008x32xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
index c07fe95..c3755fb 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul_cooperative_ops.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=rdna3@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @matmul_256x1024x128_div_add() {
@@ -15,11 +13,11 @@
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%7 = tensor.empty() : tensor<256x1024xf16>
@@ -47,22 +45,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @batch_matmul_16x128x256x512_div() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>> -> tensor<16x128x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>> -> tensor<16x512x256xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>> -> tensor<16x128x256xf16>
@@ -89,12 +85,10 @@
// Linalg.generic that is a batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -105,9 +99,9 @@
func.func @generic_batch_matmul_32x8x512x64() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>> -> tensor<128x32x64xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>> -> tensor<32x64x512xf16>
%5 = tensor.empty() : tensor<32x128x512xf16>
@@ -133,19 +127,17 @@
// K dim size not divisble by 32.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_16x1024x1024x80() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x1024x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x1024x1024xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 1024, 80], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>> -> tensor<16x1024x80xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 80, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>> -> tensor<16x80x1024xf16>
%5 = tensor.empty() : tensor<16x1024x1024xf16>
@@ -166,21 +158,19 @@
// Small K - not supported by cooperative matrix.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_256x1024x8() {
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x8xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x8xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x8xf16>> -> tensor<256x8xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>> -> tensor<8x1024xf16>
%5 = tensor.empty() : tensor<256x1024xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir
index fcf5341..f00e7b5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -15,11 +13,11 @@
#map3 = affine_map<(d0, d1, d2) -> (d0)>
func.func @i4_dequant_matvec_f32() {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
@@ -54,12 +52,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
@@ -70,11 +66,11 @@
%c32_i64 = arith.constant 32 : i64
%cst = arith.constant 0.000000e+00 : f32
%c4294967296_i64 = arith.constant 4294967296 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x1x32x128xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x1x32x128xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x128xi4>> -> tensor<4096x32x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 32, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>> -> tensor<4096x32x1xf32>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 32, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x32x1xf32>> -> tensor<4096x32x1xf32>
@@ -109,13 +105,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -152,12 +146,12 @@
%24 = arith.shli %23, %c32_i64 : i64
%25 = arith.ori %22, %24 : i64
%26 = arith.index_castui %25 : i64 to index
- %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
%30 = flow.dispatch.workload.ordinal %26, 0 : index
- %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x86x128xf32>>{%30}
- %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
+ %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x86x128xf32>>{%30}
+ %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
%33 = flow.dispatch.tensor.load %27, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%34 = flow.dispatch.tensor.load %28, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
%35 = flow.dispatch.tensor.load %29, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
@@ -192,14 +186,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
@@ -209,11 +201,11 @@
func.func @i4_dequant_matvec_f16() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x86x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x86x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>> -> tensor<4096x86x1xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>> -> tensor<4096x86x1xf16>
@@ -248,14 +240,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -292,12 +282,12 @@
%24 = arith.shli %23, %c32_i64 : i64
%25 = arith.ori %22, %24 : i64
%26 = arith.index_castui %25 : i64 to index
- %27 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %28 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %29 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %27 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %28 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%10) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %29 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%11) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
%30 = flow.dispatch.workload.ordinal %26, 0 : index
- %31 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x86x128xf32>>{%30}
- %32 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
+ %31 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%16) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x86x128xf32>>{%30}
+ %32 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%21) : !flow.dispatch.tensor<writeonly:tensor<?x4096xf32>>{%30}
%33 = flow.dispatch.tensor.load %27, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%34 = flow.dispatch.tensor.load %28, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
%35 = flow.dispatch.tensor.load %29, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
@@ -332,12 +322,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 7, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 7, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -368,12 +356,12 @@
%17 = arith.shli %16, %c32_i64 : i64
%18 = arith.ori %15, %17 : i64
%19 = arith.index_castui %18 : i64 to index
- %20 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>>
- %21 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>>
- %22 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>>
+ %20 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>>
+ %21 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>>
+ %22 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%9) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>>
%23 = flow.dispatch.workload.ordinal %19, 0 : index
- %24 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x32x128xf16>>{%23}
- %25 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%14) : !flow.dispatch.tensor<writeonly:tensor<?x11008xf16>>{%23}
+ %24 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x32x128xf16>>{%23}
+ %25 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%14) : !flow.dispatch.tensor<writeonly:tensor<?x11008xf16>>{%23}
%26 = flow.dispatch.tensor.load %20, offsets = [0, 0, 0], sizes = [11008, 32, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32x128xi4>> -> tensor<11008x32x128xi4>
%27 = flow.dispatch.tensor.load %21, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>> -> tensor<11008x32xf16>
%28 = flow.dispatch.tensor.load %22, offsets = [0, 0], sizes = [11008, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<11008x32xf16>> -> tensor<11008x32xf16>
@@ -408,12 +396,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dynamic_batch_matvec() {
%c32_i64 = arith.constant 32 : i64
@@ -428,11 +414,11 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<32x1x128xf16>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%7) : !flow.dispatch.tensor<writeonly:tensor<32x1x128xf16>>
%11 = flow.dispatch.workload.ordinal %8, 0 : index
%12 = flow.dispatch.workload.ordinal %9, 1 : index
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11}
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12}
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11}
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12}
%15 = flow.dispatch.tensor.load %13, offsets = [0, 0, 0], sizes = [32, 1, %11], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x1x?xf16>>{%11} -> tensor<32x1x?xf16>
%16 = flow.dispatch.tensor.load %14, offsets = [0, 0, 0], sizes = [32, %12, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?x128xf16>>{%12} -> tensor<32x?x128xf16>
%17 = tensor.empty() : tensor<32x1x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
index 88f9f72..aa9ae5e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_conv.mlir
@@ -2,13 +2,11 @@
// Convolution with consumer pointwise ops.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @nhwc_conv_pointwise_112x112x32() {
@@ -16,10 +14,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c112 = arith.constant 112 : index
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 112, 112, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>> -> tensor<1x112x112x32xf32>
%5 = tensor.empty() : tensor<1x112x112x32xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
@@ -45,18 +43,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @nchw_conv_2x1280x8x8() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x1280x10x10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x1280x8x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x1280x10x10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x1280x8x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 1280, 10, 10], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1280x10x10xf32>> -> tensor<2x1280x10x10xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [1280, 1280, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1280x1280x3x3xf32>> -> tensor<1280x1280x3x3xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0], sizes = [2, 1280, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<2x1280x8x8xf32>> -> tensor<2x1280x8x8xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
index 29d8e04..7f0702c 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ext_ops.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_1d_sort() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<1000xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<1000xi32>>
%1 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1000], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<1000xi32>> -> tensor<1000xi32>
%2 = iree_linalg_ext.sort dimension(0) outs(%1 : tensor<1000xi32>) {
^bb0(%arg0: i32, %arg1: i32):
@@ -29,19 +27,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @static_3d_sort() {
%c64 = arith.constant 64 : index
%c128 = arith.constant 128 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x32x128xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x32x128xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x32x128xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x32x128xi32>
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : memref<64x32x128xi32>) outs(%1 : memref<64x32x128xi32>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
@@ -63,19 +59,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_1d_fft_stage2() {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32>
%cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32>
@@ -93,11 +87,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_3d_fft_stage3() {
%c0 = arith.constant 0 : index
@@ -109,8 +101,8 @@
%cst_0 = arith.constant dense<[-0.000000e+00, -0.707106769, -1.000000e+00, -0.707106769]> : tensor<4xf32>
%0 = bufferization.to_memref %cst_0 : memref<4xf32>
%1 = bufferization.to_memref %cst : memref<4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x128x32xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<64x128x32xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x128x32xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<64x128x32xf32>
iree_linalg_ext.fft ins(%c3, %1, %0 : index, memref<4xf32>, memref<4xf32>) outs(%2, %3 : memref<64x128x32xf32>, memref<64x128x32xf32>)
return
}
@@ -124,16 +116,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_input_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8x2x6x6x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 34, 34, 128], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x34x34x128xf16>> -> tensor<2x34x34x128xf16>
%3 = tensor.empty() : tensor<8x8x2x6x6x128xf16>
%4 = iree_linalg_ext.winograd.input_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<2x34x34x128xf16>) outs(%3 : tensor<8x8x2x6x6x128xf16>) -> tensor<8x8x2x6x6x128xf16>
@@ -150,16 +140,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @winograd_output_transform() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x36x36x128xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 8, 2, 6, 6, 128], strides = [1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x8x2x6x6x128xf16>> -> tensor<8x8x2x6x6x128xf16>
%3 = tensor.empty() : tensor<2x36x36x128xf16>
%4 = iree_linalg_ext.winograd.output_transform output_tile_size(6) kernel_size(3) image_dimensions([1, 2]) ins(%2 : tensor<8x8x2x6x6x128xf16>) outs(%3 : tensor<2x36x36x128xf16>) -> tensor<2x36x36x128xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
index 13f86b7..dcec345 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -18,8 +16,8 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xi32>{%0, %1}
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xi32>{%0, %1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xi32>{%0, %1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xi32>{%0, %1}
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%2 : memref<?x?xi32>) outs(%3 : memref<?x?xi32>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
@@ -35,11 +33,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -53,8 +49,8 @@
%c0 = arith.constant 0 : index
%c224 = arith.constant 224 : index
%c3 = arith.constant 3 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x224x224x3xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<1x224x224x3xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x224x224x3xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<1x224x224x3xf32>
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : memref<1x224x224x3xf32>) outs(%1 : memref<1x224x224x3xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
@@ -73,11 +69,9 @@
// Average pooling op with nice tilable input.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -91,8 +85,8 @@
%cst = arith.constant 0.000000e+00 : f32
%c2 = arith.constant 2 : index
%c8 = arith.constant 8 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x24x24x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x24x24x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x8xf32>>
%2 = tensor.empty() : tensor<12x12xf32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 24, 24, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x24x24x8xf32>> -> tensor<1x24x24x8xf32>
%4 = tensor.empty() : tensor<1x2x2x8xf32>
@@ -111,11 +105,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -129,8 +121,8 @@
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 4.900000e+01 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x7x7x1280xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x1280xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x7x7x1280xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x1x1280xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 7, 7, 1280], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x7x7x1280xf32>> -> tensor<1x7x7x1280xf32>
%3 = tensor.empty() : tensor<7x7xf32>
%4 = tensor.empty() : tensor<1x1x1x1280xf32>
@@ -156,11 +148,9 @@
// Max pooling op with odd size-1 dimension sizes.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -175,8 +165,8 @@
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c320 = arith.constant 320 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x76x1x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x38x1x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x76x1x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x38x1x1xf32>>
%2 = tensor.empty() : tensor<2x1xf32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 76, 1, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x76x1x1xf32>> -> tensor<1x76x1x1xf32>
%4 = tensor.empty() : tensor<1x38x1x1xf32>
@@ -197,12 +187,10 @@
// Element wise op with mismatched input and output rank.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -217,9 +205,9 @@
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<10xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<10xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<10xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 10], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x10xf32>> -> tensor<1x10xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [10], strides = [1] : !flow.dispatch.tensor<readonly:tensor<10xf32>> -> tensor<10xf32>
%5 = tensor.empty() : tensor<10xf32>
@@ -240,11 +228,9 @@
// Fused depthwise convolution and element wise ops: don't vectorize with partially active subgroups.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -263,8 +249,8 @@
%c4 = arith.constant 4 : index
%c4576 = arith.constant 4576 : index
%c6272 = arith.constant 6272 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x21x20x1xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x19x18x1x4xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x21x20x1xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<1x19x18x1x4xf32>>
%2 = tensor.empty() : tensor<1x19x18x1x4xf32>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 21, 20, 1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x21x20x1xf32>> -> tensor<1x21x20x1xf32>
%4 = tensor.empty() : tensor<1x19x18x1x4xf32>
@@ -289,11 +275,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -307,8 +291,8 @@
func.func @outermost_reduction() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x2048x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x2048x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 2048, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x2048x512xf32>> -> tensor<4x2048x512xf32>
%3 = tensor.empty() : tensor<2048x512xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2048x512xf32>) -> tensor<2048x512xf32>
@@ -330,11 +314,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -353,9 +335,9 @@
%3 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 394752 : index, 984064 : index]} : i32 to index
%4 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [0 : index, 196608 : index, 197120 : index]} : i32 to index
%5 = arith.index_cast %2 {stream.alignment = 512 : index, stream.values = [512 : index, 197120 : index, 197632 : index]} : i32 to index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%3) : !flow.dispatch.tensor<readonly:tensor<128x384xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%4) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%5) : !flow.dispatch.tensor<writeonly:tensor<128xf32>>
%9 = flow.dispatch.tensor.load %6, offsets = [0, 0], sizes = [128, 384], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x384xf32>> -> tensor<128x384xf32>
%10 = flow.dispatch.tensor.load %7, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
%11 = tensor.empty() : tensor<128xf32>
@@ -380,11 +362,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -397,8 +377,8 @@
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
func.func @four_dim_elementwise() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x8x256x4xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256x4x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x8x256x4xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256x4x8xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [128, 8, 256, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x8x256x4xf32>> -> tensor<128x8x256x4xf32>
%3 = tensor.empty() : tensor<128x256x4x8xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<128x8x256x4xf32>) outs(%3 : tensor<128x256x4x8xf32>) {
@@ -418,11 +398,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -436,8 +414,8 @@
func.func @odd_reduction_dimension_size_501() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0xFF800000 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x501xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x501xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x501xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x501xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 501], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x501xf32>> -> tensor<512x501xf32>
%3 = tensor.empty() : tensor<512x501xf32>
%4 = tensor.empty() : tensor<512xf32>
@@ -466,11 +444,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -484,8 +460,8 @@
func.func @odd_reduction_dimension_size_2809() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0xFF800000 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x2809xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x2809xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x2809xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x2809xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 2809], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x2809xf32>> -> tensor<512x2809xf32>
%3 = tensor.empty() : tensor<512x2809xf32>
%4 = tensor.empty() : tensor<512xf32>
@@ -514,11 +490,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -533,8 +507,8 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 1.000000e-10 : f32
%cst_0 = arith.constant -1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<f32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x1x1x1xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<f32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2048x1x1x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32>
%3 = tensor.empty() : tensor<2048x1x1x1xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<f32>) outs(%3 : tensor<2048x1x1x1xf32>) {
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
index 9370b6c..79df8d2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
@@ -2,12 +2,10 @@
// Odd K that forbids vectorization.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -22,9 +20,9 @@
%c3 = arith.constant 3 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x3x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x3x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x3x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x3x32xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [1, 3, 3], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x3x3xf32>> -> tensor<1x3x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 3, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x3x32xf32>> -> tensor<1x3x32xf32>
%5 = tensor.empty() : tensor<1x3x32xf32>
@@ -45,12 +43,10 @@
// 8-bit integers can be vectorized.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -64,9 +60,9 @@
%c16 = arith.constant 16 : index
%c64 = arith.constant 64 : index
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x16xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x16xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x16xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x16xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xi8>> -> tensor<64x32xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x16xi8>> -> tensor<32x16xi8>
%5 = tensor.empty() : tensor<64x16xi32>
@@ -87,12 +83,10 @@
// Vectorize non-32 bit types.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -106,9 +100,9 @@
%c16 = arith.constant 16 : index
%c64 = arith.constant 64 : index
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xi64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x16xi64>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x16xi64>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xi64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x16xi64>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x16xi64>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xi64>> -> tensor<64x32xi64>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x16xi64>> -> tensor<32x16xi64>
%5 = tensor.empty() : tensor<64x16xi64>
@@ -129,12 +123,10 @@
// Odd N that forbids vectorization.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -151,10 +143,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c400 = arith.constant 400 : index
%c273 = arith.constant 273 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c11775744) : !flow.dispatch.tensor<readonly:tensor<273xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<400x576xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<576x273xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<400x273xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c11775744) : !flow.dispatch.tensor<readonly:tensor<273xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<400x576xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<576x273xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<400x273xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [273], strides = [1] : !flow.dispatch.tensor<readonly:tensor<273xf32>> -> tensor<273xf32>
%5 = tensor.empty() : tensor<400x273xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [400, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<400x576xf32>> -> tensor<400x576xf32>
@@ -182,12 +174,10 @@
// Odd M and non-4-multiplier N
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -204,10 +194,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c25 = arith.constant 25 : index
%c546 = arith.constant 546 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c15842560) : !flow.dispatch.tensor<readonly:tensor<546xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<25x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x546xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<25x546xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c15842560) : !flow.dispatch.tensor<readonly:tensor<546xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<25x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x546xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<25x546xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [546], strides = [1] : !flow.dispatch.tensor<readonly:tensor<546xf32>> -> tensor<546xf32>
%5 = tensor.empty() : tensor<25x546xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [25, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<25x512xf32>> -> tensor<25x512xf32>
@@ -235,14 +225,12 @@
// Matmul with consumer pointwise ops
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -257,11 +245,11 @@
%cst = arith.constant 0.000000e+00 : f16
%c256 = arith.constant 256 : index
%c1024 = arith.constant 1024 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%7 = tensor.empty() : tensor<256x1024xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir
index 8deaa11..965c7dc 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir
@@ -1,22 +1,20 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d1)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @complex_view_as_real() {
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x50xcomplex<f32>>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x32x50x2xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x50x2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x50xcomplex<f32>>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x32x50x2xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x50x2xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1xi32>> -> tensor<1xi32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x32x50x2xf32>> -> tensor<1x1x32x50x2xf32>
%6 = tensor.empty() : tensor<32x50x2xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
index 0f31e2a..5fadf17 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_reduction.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -18,8 +16,8 @@
func.func @subgroup_reduce_f32() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x512xf32>> -> tensor<2x512xf32>
%3 = tensor.empty() : tensor<2xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32>
@@ -41,11 +39,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -59,8 +55,8 @@
func.func @subgroup_reduce_f16() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x4096x4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x4096x4096xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 4096, 4096], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x4096x4096xf16>> -> tensor<16x4096x4096xf16>
%3 = tensor.empty() : tensor<16x4096x4096xf16>
%4 = tensor.empty() : tensor<16x4096xf16>
@@ -88,11 +84,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1], [0, 64]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -116,9 +110,9 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8xf32>>
%8 = flow.dispatch.workload.ordinal %6, 0 : index
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x?xf32>>{%8}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x?xf32>>{%8}
%10 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [8, %8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x?xf32>>{%8} -> tensor<8x?xf32>
%11 = tensor.empty() : tensor<8xf32>
%12 = linalg.fill {lowering_config = #config} ins(%cst : f32) outs(%11 : tensor<8xf32>) -> tensor<8xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir
index fefcfe0..f2bdec2 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_sub_byte_types.mlir
@@ -1,21 +1,19 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1) -> (d0)>
func.func @i4_dequant() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<131072x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<131072x128xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [131072, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>> -> tensor<131072x128xi4>
%5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor<readonly:tensor<131072xf32>> -> tensor<131072xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor<readonly:tensor<131072xf32>> -> tensor<131072xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
index 8ae533f..78a057e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
@@ -2,21 +2,19 @@
// Conv - large OC - distribute to only one workgroup dimension.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_112x112x512() {
%c0 = arith.constant 0 : index
%c512 = arith.constant 512 : index
%c112 = arith.constant 112 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 512], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x512xf32>> -> tensor<3x3x3x512xf32>
%5 = tensor.empty() : tensor<1x112x112x512xf32>
@@ -37,21 +35,19 @@
// Conv - medium OC/OW/OH - distribute to two workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_112x112x32() {
%c0 = arith.constant 0 : index
%c32 = arith.constant 32 : index
%c112 = arith.constant 112 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 225, 225, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x225x225x3xf32>> -> tensor<1x225x225x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 32], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>> -> tensor<3x3x3x32xf32>
%5 = tensor.empty() : tensor<1x112x112x32xf32>
@@ -72,20 +68,18 @@
// Conv - small OC/OW/OH - distribute to all three workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @conv_16x16x16() {
%c0 = arith.constant 0 : index
%c16 = arith.constant 16 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x16x16x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x16x16x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 33, 33, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x33x33x3xf32>> -> tensor<1x33x33x3xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [3, 3, 3, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x3x16xf32>> -> tensor<3x3x3x16xf32>
%5 = tensor.empty() : tensor<1x16x16x16xf32>
@@ -106,21 +100,19 @@
// Depthwise conv - small OC/OW/OH - distribute to all three workgroup dimensions.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dwconv_28x28x144() {
%c0 = arith.constant 0 : index
%c144 = arith.constant 144 : index
%c28 = arith.constant 28 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x144xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x28x28x144xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [0, 57, 57, 144], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x57x57x144xf32>> -> tensor<1x57x57x144xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 144], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x144xf32>> -> tensor<3x3x144xf32>
%5 = tensor.empty() : tensor<1x28x28x144xf32>
@@ -141,12 +133,10 @@
// Depthwise conv - tiny OC/OW/OH - starving the GPU.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @dwconv_1x2x8() {
%c0 = arith.constant 0 : index
@@ -154,9 +144,9 @@
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x3x5x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x2x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1x3x5x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1x1x2x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [1, 3, 5, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x3x5x8xf32>> -> tensor<1x3x5x8xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [3, 3, 8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x3x8xf32>> -> tensor<3x3x8xf32>
%5 = tensor.empty() : tensor<1x1x2x8xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
index 5f30177..7facf67 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
@@ -2,21 +2,19 @@
// Large matmul that can match the best tiling scheme.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1024x2048x512() {
%c0 = arith.constant 0 : index
%c2048 = arith.constant 2048 : index
%c1024 = arith.constant 1024 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x2048xf32>> -> tensor<512x2048xf32>
%5 = tensor.empty() : tensor<1024x2048xf32>
@@ -37,21 +35,19 @@
// Small matmul N that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_3136x24x96() {
%c0 = arith.constant 0 : index
%c24 = arith.constant 24 : index
%c3136 = arith.constant 3136 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<96x24xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3136x24xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<96x24xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<3136x24xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [3136, 96], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3136x96xf32>> -> tensor<3136x96xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [96, 24], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<96x24xf32>> -> tensor<96x24xf32>
%5 = tensor.empty() : tensor<3136x24xf32>
@@ -72,21 +68,19 @@
// Small matmul M that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_196x64x192() {
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%c196 = arith.constant 196 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x192xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<192x64xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x64xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<196x192xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<192x64xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<196x64xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [196, 192], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<196x192xf32>> -> tensor<196x192xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [192, 64], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<192x64xf32>> -> tensor<192x64xf32>
%5 = tensor.empty() : tensor<196x64xf32>
@@ -107,21 +101,19 @@
// Small matmul K that can still tile to all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_12544x96x16() {
%c0 = arith.constant 0 : index
%c96 = arith.constant 96 : index
%c12544 = arith.constant 12544 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<12544x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x96xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<12544x96xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<12544x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x96xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<12544x96xf32>
linalg.fill ins(%cst : f32) outs(%2 : memref<12544x96xf32>)
linalg.matmul ins(%0, %1 : memref<12544x16xf32>, memref<16x96xf32>) outs(%2 : memref<12544x96xf32>)
return
@@ -138,21 +130,19 @@
// Odd matmul M and small N that cannot utilize all threads in a workgroup.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_49x160x576() {
%c0 = arith.constant 0 : index
%c160 = arith.constant 160 : index
%c49 = arith.constant 49 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<49x576xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<576x160xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<49x160xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<49x576xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<576x160xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<49x160xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [49, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<49x576xf32>> -> tensor<49x576xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 160], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<576x160xf32>> -> tensor<576x160xf32>
%5 = tensor.empty() : tensor<49x160xf32>
@@ -173,12 +163,10 @@
// Small matmul M to "shift" parallelism to N.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_2x1024x576() {
%cst = arith.constant 0.000000e+00 : f32
@@ -189,10 +177,10 @@
%c3436864 = arith.constant 3436864 : index
%c10141312 = arith.constant 10141312 : index
%c2304 = arith.constant 2304 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x576xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c3436864) : !flow.dispatch.tensor<readonly:tensor<576x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c10141312) : !flow.dispatch.tensor<readonly:tensor<2x1024xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x576xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c3436864) : !flow.dispatch.tensor<readonly:tensor<576x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c10141312) : !flow.dispatch.tensor<readonly:tensor<2x1024xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1024xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 576], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x576xf32>> -> tensor<2x576xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [576, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<576x1024xf32>> -> tensor<576x1024xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1024xf32>> -> tensor<2x1024xf32>
@@ -214,21 +202,19 @@
// Large matmul with i8 inputs.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1024x2048x512xi8() {
%c0 = arith.constant 0 : index
%c2048 = arith.constant 2048 : index
%c1024 = arith.constant 1024 : index
%c0_i32 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xi8>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xi8>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xi32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x512xi8>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x2048xi8>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x2048xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512xi8>> -> tensor<1024x512xi8>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x2048xi8>> -> tensor<512x2048xi8>
%5 = tensor.empty() : tensor<1024x2048xi32>
@@ -240,21 +226,19 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_4x384x384() {
%c0 = arith.constant 0 : index
%c384 = arith.constant 384 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x384x384xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x384x384xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 384, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x384x32xf32>> -> tensor<4x384x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 384], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x32x384xf32>> -> tensor<4x32x384xf32>
%5 = tensor.empty() : tensor<4x384x384xf32>
@@ -275,12 +259,10 @@
// Small batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_4x2x8() {
%c0 = arith.constant 0 : index
@@ -288,9 +270,9 @@
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x2x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x2x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x2x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x2x8xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4, 2, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x2x32xf32>> -> tensor<4x2x32xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4, 32, 8], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x32x8xf32>> -> tensor<4x32x8xf32>
%5 = tensor.empty() : tensor<4x2x8xf32>
@@ -311,12 +293,10 @@
// Linalg.generic that is a batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -327,9 +307,9 @@
func.func @generic_batch_matmul_32x2x512() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x32x64xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x8x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x32x64xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x8x512xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x32x64xf32>> -> tensor<8x32x64xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x64x512xf32>> -> tensor<32x64x512xf32>
%5 = tensor.empty() : tensor<32x8x512xf32>
@@ -355,13 +335,11 @@
// Linalg.generic that is a batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d3, d2)>
@@ -372,11 +350,11 @@
%c537247744 = arith.constant 537247744 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c168607744) : !flow.dispatch.tensor<readonly:tensor<8x2500x4608xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4608x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c537247744) : !flow.dispatch.tensor<readonly:tensor<8x2500x512xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x2500x512xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x2500x512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c168607744) : !flow.dispatch.tensor<readonly:tensor<8x2500x4608xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4608x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c537247744) : !flow.dispatch.tensor<readonly:tensor<8x2500x512xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<8x2500x512xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x2500x512xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [8, 2500, 4608], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x2500x4608xf32>> -> tensor<8x2500x4608xf32>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4608, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4608x512xf32>> -> tensor<4608x512xf32>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [8, 2500, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<8x2500x512xf32>> -> tensor<8x2500x512xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
index 4c3f060..8d4f3f0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_4x4096x9216() {
%c36864 = arith.constant 36864 : index
@@ -13,10 +11,10 @@
%c209920 = arith.constant 209920 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x9216xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor<readonly:tensor<4x4096xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor<writeonly:tensor<4x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<4x9216xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor<readonly:tensor<4x4096xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor<writeonly:tensor<4x4096xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 9216], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x9216xf32>> -> tensor<4x9216xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [9216, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>> -> tensor<9216x4096xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4096xf32>> -> tensor<4x4096xf32>
@@ -36,12 +34,10 @@
// Matvec does not go down matmul pipelines.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_1x4096x9216() {
%c36864 = arith.constant 36864 : index
@@ -49,10 +45,10 @@
%c209920 = arith.constant 209920 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x9216xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor<readonly:tensor<1x4096xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor<writeonly:tensor<1x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x9216xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c209920) : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c667974912) : !flow.dispatch.tensor<readonly:tensor<1x4096xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c36864) : !flow.dispatch.tensor<writeonly:tensor<1x4096xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1, 9216], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x9216xf32>> -> tensor<1x9216xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [9216, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<9216x4096xf32>> -> tensor<9216x4096xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [1, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1x4096xf32>> -> tensor<1x4096xf32>
@@ -72,12 +68,10 @@
// Multi-reduction-dimension transposed-B matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)>
@@ -85,9 +79,9 @@
func.func @multi_reduction_transposed_b_matmul() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x86x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x86x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>> -> tensor<4096x86x128xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [2048, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x86x128xf32>> -> tensor<2048x86x128xf32>
%5 = tensor.empty() : tensor<4096x2048xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index db27e00..b818edd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
@@ -2,14 +2,12 @@
// RUN: --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass)' %s | \
// RUN: FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @matmul_256x1024x128_div_add() {
@@ -17,11 +15,11 @@
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%7 = tensor.empty() : tensor<256x1024xf16>
@@ -49,22 +47,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @batch_matmul_16x128x256x512_div() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>> -> tensor<16x128x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>> -> tensor<16x512x256xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>> -> tensor<16x128x256xf16>
@@ -91,12 +87,10 @@
// Linalg.generic that is a batch matmul.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
@@ -107,9 +101,9 @@
func.func @generic_batch_matmul_32x8x512x64() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [2, 32, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x64xf16>> -> tensor<128x32x64xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [32, 64, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x64x512xf16>> -> tensor<32x64x512xf16>
%5 = tensor.empty() : tensor<32x128x512xf16>
@@ -135,19 +129,17 @@
// K dim size not divisble by 32.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @batch_matmul_16x1024x1024x80() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x1024x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x1024x1024xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 1024, 80], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x1024x80xf16>> -> tensor<16x1024x80xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 80, 1024], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x80x1024xf16>> -> tensor<16x80x1024xf16>
%5 = tensor.empty() : tensor<16x1024x1024xf16>
@@ -168,21 +160,19 @@
// Small K - not supported by cooperative matrix.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_256x1024x8() {
%c0 = arith.constant 0 : index
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x8xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x8xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x8xf16>> -> tensor<256x8xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [8, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x1024xf16>> -> tensor<8x1024xf16>
%5 = tensor.empty() : tensor<256x1024xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir
index f42b1c1..ed93869 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_user.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(iree-codegen-materialize-user-configs, iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[128, 256], [16, 16]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize workgroup_size = [16, 8, 1] subgroup_size = 64>
@@ -15,9 +13,9 @@
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<128x1024xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf32>> -> tensor<256x1024xf32>
%5 = tensor.empty() : tensor<128x1024xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir
index eca6f4a..73f09e6 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir
@@ -6,7 +6,9 @@
wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8, subgroup = shuffle|arithmetic, dot = dp4xi8toi32, mma = [<WMMA_F32_16x16x16_F16>, <WMMA_F16_16x16x16_F16>],
subgroup_size_choices = [32, 64], max_workgroup_sizes = [1024, 1024, 1024], max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536,
max_workgroup_counts = [2147483647, 2147483647, 2147483647]>>}>) {
- hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>]>
+ ) {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
index 9fcdab1..b437006 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv))))' %s | FileCheck %s
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv{index-bits=64}))))' %s | FileCheck %s --check-prefix=INDEX64
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @push_constant {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -38,14 +36,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<3, bindings = [
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @resource_bindings_in_same_func {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -54,10 +48,10 @@
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>} {
// CHECK-LABEL: spirv.module
- // CHECK: spirv.GlobalVariable @[[ARG0:.+]] bind(1, 2) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[ARG1_0:.+]] bind(1, 3) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[ARG1_1:.+]] bind(1, 3) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[RET0:.+]] bind(3, 4) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[ARG0:.+]] bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[ARG1_0:.+]] bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[ARG1_1:.+]] bind(0, 1) {aliased} : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[RET0:.+]] bind(0, 2) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
// CHECK: spirv.func @resource_bindings_in_same_entry_func()
func.func @resource_bindings_in_same_entry_func() -> f32 {
%c0 = arith.constant 0 : index
@@ -65,17 +59,17 @@
// Same type
// CHECK: spirv.mlir.addressof @[[ARG0]]
// CHECK: spirv.mlir.addressof @[[ARG0]]
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
// Different type
// CHECK: spirv.mlir.addressof @[[ARG1_0]]
// CHECK: spirv.mlir.addressof @[[ARG1_1]]
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(3) : memref<4xvector<4xf32>, #spirv.storage_class<StorageBuffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4xvector<4xf32>, #spirv.storage_class<StorageBuffer>>
// CHECK: spirv.mlir.addressof @[[RET0]]
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
%5 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
%6 = memref.load %1[%c0, %c0] : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
@@ -99,13 +93,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<3, bindings = [
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @resource_bindings_in_multi_entry_func {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -117,18 +107,18 @@
}
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>} {
// CHECK-LABEL: spirv.module
- // CHECK: spirv.GlobalVariable @[[FUNC1_ARG:.+]] bind(1, 2) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[FUNC1_RET:.+]] bind(3, 4) : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[FUNC2_ARG:.+]] bind(1, 2) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
- // CHECK: spirv.GlobalVariable @[[FUNC2_RET:.+]] bind(3, 4) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[FUNC1_ARG:.+]] bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[FUNC1_RET:.+]] bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.array<4 x vector<4xf32>, stride=16> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[FUNC2_ARG:.+]] bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
+ // CHECK: spirv.GlobalVariable @[[FUNC2_RET:.+]] bind(0, 1) : !spirv.ptr<!spirv.struct<(!spirv.array<16 x f32, stride=4> [0])>, StorageBuffer>
// CHECK: spirv.func @resource_bindings_in_entry_func1()
func.func @resource_bindings_in_entry_func1() -> f32 {
// CHECK: spirv.mlir.addressof @[[FUNC1_ARG]]
// CHECK: spirv.mlir.addressof @[[FUNC1_RET]]
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4xvector<4xf32>, #spirv.storage_class<StorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4xvector<4xf32>, #spirv.storage_class<StorageBuffer>>
%2 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
%3 = memref.load %1[%c0] : memref<4xvector<4xf32>, #spirv.storage_class<StorageBuffer>>
@@ -144,8 +134,8 @@
// CHECK: spirv.mlir.addressof @[[FUNC2_ARG]]
// CHECK: spirv.mlir.addressof @[[FUNC2_RET]]
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>> // Same type as previous function
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(3) binding(4) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>> // Different type as previous function
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>> // Same type as previous function
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x4xf32, #spirv.storage_class<StorageBuffer>> // Different type as previous function
%2 = memref.load %0[%c0, %c0] : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
%3 = memref.load %1[%c0, %c0] : memref<4x4xf32, #spirv.storage_class<StorageBuffer>>
@@ -160,12 +150,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_binding {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -175,9 +163,9 @@
builtin.module attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Int64, Shader], []>, #spirv.resource_limits<>>} {
func.func @interface_binding() -> f32 {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x5xf32, #spirv.storage_class<StorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xf32, #spirv.storage_class<StorageBuffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8x5xf32, #spirv.storage_class<StorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x5xf32, #spirv.storage_class<StorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xf32, #spirv.storage_class<StorageBuffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8x5xf32, #spirv.storage_class<StorageBuffer>>
%3 = memref.load %0[%c0, %c0] : memref<8x5xf32, #spirv.storage_class<StorageBuffer>>
%4 = memref.load %1[%c0] : memref<5xf32, #spirv.storage_class<StorageBuffer>>
@@ -205,12 +193,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_wg_id {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -240,12 +226,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_wg_size {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -257,7 +241,7 @@
%c0 = arith.constant 0.0 : f32
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_y = hal.interface.workgroup.size[1] : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x64xf32, #spirv.storage_class<StorageBuffer>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x64xf32, #spirv.storage_class<StorageBuffer>>
memref.store %c0, %subspan[%workgroup_size_x, %workgroup_size_y] : memref<64x64xf32, #spirv.storage_class<StorageBuffer>>
return
}
@@ -278,12 +262,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_wg_count {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
index eb1c281..2840572 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir
@@ -2,12 +2,10 @@
// RUN: --pass-pipeline='builtin.module(func.func(iree-spirv-emulate-i64))' %s | \
// RUN: FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -19,9 +17,9 @@
func.func @buffer_types() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%c1_i64 = arith.constant 1 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
%3 = memref.load %0[%c0] : memref<8xi32, #spirv.storage_class<StorageBuffer>>
%4 = memref.load %1[%c0] : memref<8xi64, #spirv.storage_class<StorageBuffer>>
%5 = arith.addi %4, %c1_i64 : i64
@@ -32,8 +30,8 @@
// Check that without the Int64 capability emulation produces expected i32 ops.
//
// CHECK-LABEL: func.func @buffer_types
-// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
-// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
+// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
+// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
// CHECK: [[VI64:%.+]] = memref.load [[REF_I64_0]][{{%.+}}] : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
// CHECK: {{%.+}} = arith.addui_extended {{%.+}}, {{%.+}} : i32, i1
// CHECK: memref.store {{%.+}}, [[REF_I64_1]][{{%.+}}] : memref<8xvector<2xi32>, #spirv.storage_class<StorageBuffer>>
@@ -41,11 +39,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -61,9 +57,9 @@
%c36864 = arith.constant 36864 : index
%c1523712 = arith.constant 1523712 : index
%c96 = arith.constant 96 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c96}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c1523712) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c36864}
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c36864}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c96}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c1523712) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c36864}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<?xvector<4xi32>, #spirv.storage_class<StorageBuffer>>{%c36864}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%thread_id_x = gpu.thread_id x
@@ -94,12 +90,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -111,9 +105,9 @@
func.func @no_emulation() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%c1_i64 = arith.constant 1 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
%3 = memref.load %0[%c0] : memref<8xi32, #spirv.storage_class<StorageBuffer>>
%4 = memref.load %1[%c0] : memref<8xi64, #spirv.storage_class<StorageBuffer>>
%5 = arith.addi %4, %c1_i64 : i64
@@ -125,9 +119,9 @@
//
// CHECK-LABEL: func.func @no_emulation
// CHECK: [[CST1:%.+]] = arith.constant 1 : i64
-// CHECK: [[REF_I32:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
-// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
-// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+// CHECK: [[REF_I32:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<8xi32, #spirv.storage_class<StorageBuffer>>
+// CHECK: [[REF_I64_0:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
+// CHECK: [[REF_I64_1:%.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) : memref<8xi64, #spirv.storage_class<StorageBuffer>>
// CHECK: [[VI32:%.+]] = memref.load [[REF_I32]][{{%.+}}] : memref<8xi32, #spirv.storage_class<StorageBuffer>>
// CHECK: [[VI64:%.+]] = memref.load [[REF_I64_0]][{{%.+}}] : memref<8xi64, #spirv.storage_class<StorageBuffer>>
// CHECK: {{%.+}} = arith.addi {{%.+}} : i64
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir
index aa25417..6031f46 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/erase_storage_buffer_static_shape.mlir
@@ -1,14 +1,12 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-spirv-erase-storage-buffer-static-shape))" %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @storage_buffer_load_store(%offset: index, %i0: index, %i1: index) {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
%val = memref.load %0[%i0] : memref<256xf32, #hal.descriptor_type<storage_buffer>>
memref.store %val, %1[%i1] : memref<256xf32, #hal.descriptor_type<storage_buffer>>
return
@@ -17,8 +15,8 @@
// CHECK-LABEL: func.func @storage_buffer_load_store
// CHECK-SAME: (%[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[I1:.+]]: index)
// CHECK: %[[C256:.+]] = arith.constant 256 : index
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%[[OFFSET]]) flags(ReadOnly) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C256]]}
-// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) alignment(64) offset(%[[OFFSET]]) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C256]]}
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%[[OFFSET]]) flags(ReadOnly) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C256]]}
+// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) alignment(64) offset(%[[OFFSET]]) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C256]]}
// CHECK: %[[LD:.+]] = memref.load %[[SPAN0]][%[[I0]]]
// CHECK: memref.store %[[LD]], %[[SPAN1]][%[[I1]]]
@@ -26,51 +24,45 @@
// Test that we don't rewrite memref for uniform buffers.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @uniform_buffer_load(%offset: index, %i0: index) -> f32 {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<uniform_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<uniform_buffer>>
%val = memref.load %0[%i0] : memref<256xf32, #hal.descriptor_type<uniform_buffer>>
return %val : f32
}
// CHECK-LABEL: func.func @uniform_buffer_load
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<uniform_buffer>>
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<uniform_buffer>>
// CHECK: memref.load %[[SPAN0]]
// -----
// Test that we don't rewrite memref without HAL descriptor types.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<uniform_buffer>
]>
func.func @uniform_buffer_load(%offset: index, %i0: index) -> f32 {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32>
%val = memref.load %0[%i0] : memref<256xf32>
return %val : f32
}
// CHECK-LABEL: func.func @uniform_buffer_load
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32>
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) flags(ReadOnly) : memref<256xf32>
// CHECK: memref.load %[[SPAN0]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @storage_buffer_transfer_read_write(%offset: index, %i0: index, %i1: index) {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) flags(ReadOnly) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%offset) : memref<256xf32, #hal.descriptor_type<storage_buffer>>
%f0 = arith.constant 0.0 : f32
%val = vector.transfer_read %0[%i0], %f0 {in_bounds = [true]} : memref<256xf32, #hal.descriptor_type<storage_buffer>>, vector<4xf32>
vector.transfer_write %val, %1[%i1] {in_bounds = [true]} : vector<4xf32>, memref<256xf32, #hal.descriptor_type<storage_buffer>>
@@ -83,14 +75,12 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @storage_buffer_subview(%offset : index, %i0: index, %i1: index) -> f32 {
%c0 = arith.constant 0 : index
- %subspan = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
+ %subspan = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview = memref.subview %subspan[%i0][16][1] : memref<128xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<16xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%value = memref.load %subview[%c0] : memref<16xf32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
return %value : f32
@@ -101,18 +91,16 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @storage_buffer_cast(%offset: index) -> memref<?xf32, #hal.descriptor_type<storage_buffer>> {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%offset) : memref<16xf32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%offset) : memref<16xf32, #hal.descriptor_type<storage_buffer>>
%1 = memref.cast %0 : memref<16xf32, #hal.descriptor_type<storage_buffer>> to memref<?xf32, #hal.descriptor_type<storage_buffer>>
return %1 : memref<?xf32, #hal.descriptor_type<storage_buffer>>
}
// CHECK-LABEL: func.func @storage_buffer_cast
// CHECK: %[[C16:.+]] = arith.constant 16 : index
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) alignment(64) offset(%{{.+}}) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C16]]}
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) alignment(64) offset(%{{.+}}) : memref<?xf32, #hal.descriptor_type<storage_buffer>>{%[[C16]]}
// CHECK: return %[[SPAN0]]
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
index 681cf99..6127e84 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/illegal_configuration.mlir
@@ -2,12 +2,10 @@
// RUN: --pass-pipeline='builtin.module(iree-codegen-materialize-user-configs, iree-spirv-select-lowering-strategy-pass)' \
// RUN: --verify-diagnostics --split-input-file %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = []>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -21,9 +19,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x8xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<8x16xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x16xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x8xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<8x16xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x16xf32>
// expected-error @+1 {{expected 1 levels of tiling sizes, got 0}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<4x8xf32>, memref<8x16xf32>) outs(%2 : memref<4x16xf32>)
return
@@ -31,12 +29,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64], [4, 4], [0, 0, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -51,21 +47,19 @@
// expected-error @+1 {{expected workgroup size to have three dimensions for SPIR-V pipelines}}
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32>
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>)
return
}
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64], [4, 4], [0, 0, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -79,9 +73,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32>
// expected-error @+1 {{expected workgroup size dimensions not exceeding [128, 128, 64]}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>)
return
@@ -89,12 +83,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64], [4, 2], [0, 0, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -108,9 +100,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32>
// expected-error @+1 {{expected total invocation count in workgroup to be <= 128}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>)
return
@@ -118,12 +110,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64], [16, 8], [0, 0, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -137,9 +127,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32>
// expected-error @+1 {{expected total workgroup size to be multiple of 32}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>)
return
@@ -147,12 +137,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 60], [4, 4], [0, 0, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -166,9 +154,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x128xf32>
// expected-error @+1 {{expected each workgroup size dimension to be power of two}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x128xf32>) outs(%2 : memref<64x128xf32>)
return
@@ -176,12 +164,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -195,9 +181,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<48x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x128xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<48x128xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<48x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x128xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<48x128xf32>
// expected-error @+1 {{LHS shape is indivisible by first level tile size}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<48x16xf32>, memref<16x128xf32>) outs(%2 : memref<48x128xf32>)
return
@@ -205,12 +191,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 64, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -224,9 +208,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<64x16xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<16x80xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<64x80xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<64x16xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<16x80xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<64x80xf32>
// expected-error @+1 {{RHS shape is indivisible by first level tile size}}
linalg.matmul {compilation_info = #compilation} ins(%0, %1 : memref<64x16xf32>, memref<16x80xf32>) outs(%2 : memref<64x80xf32>)
return
@@ -234,12 +218,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [32, 32], [0, 0, 16]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -255,9 +237,9 @@
func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xf16>> -> tensor<64x32xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128xf16>> -> tensor<32x128xf16>
%5 = tensor.empty() : tensor<64x128xf16>
@@ -270,12 +252,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [32, 32], [0, 0, 16], [8, 8, 8]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -291,9 +271,9 @@
func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xf16>> -> tensor<64x32xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128xf16>> -> tensor<32x128xf16>
%5 = tensor.empty() : tensor<64x128xf16>
@@ -306,12 +286,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32], [8, 8], [0, 0, 4], [16, 16, 16]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -327,9 +305,9 @@
func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xf16>> -> tensor<64x32xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128xf16>> -> tensor<32x128xf16>
%5 = tensor.empty() : tensor<64x128xf16>
@@ -342,12 +320,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [32, 32], [0, 0, 16], [16, 16, 16]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -363,9 +339,9 @@
func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xf16>> -> tensor<64x32xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128xf16>> -> tensor<32x128xf16>
%5 = tensor.empty() : tensor<64x128xf16>
@@ -378,12 +354,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 64], [32, 32], [0, 0, 16], [16, 16, 16]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -399,9 +373,9 @@
func.func @matmul_tensor() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<64x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<32x128xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<64x128xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [64, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x32xf16>> -> tensor<64x32xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128xf16>> -> tensor<32x128xf16>
%5 = tensor.empty() : tensor<64x128xf16>
@@ -414,12 +388,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 2], [0, 0, 0, 0, 1, 1, 4]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -439,9 +411,9 @@
%c16 = arith.constant 16 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -474,12 +446,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 6, 6, 16], [0, 3, 3, 2], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -499,9 +469,9 @@
%c16 = arith.constant 16 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -534,12 +504,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -559,9 +527,9 @@
%c16 = arith.constant 16 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -594,12 +562,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 7, 64], [0, 1, 7, 2], [0, 0, 0, 0, 5, 5], [0, 1, 0, 0]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -613,9 +579,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32>
// expected-error @+1 {{expected tile sizes for KH and KW to be 1}}
linalg.depthwise_conv_2d_nhwc_hwc {compilation_info = #compilation} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>)
return
@@ -623,12 +589,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 7, 64], [0, 1, 7, 2], [0, 0, 0, 0, 1, 1], [0, 0, 1, 1]]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -642,9 +606,9 @@
#compilation = #iree_codegen.compilation_info<lowering_config = #config, translation_info = #translation>
func.func @illegal() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<1x11x11x576xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5x5x576xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<1x7x7x576xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<1x11x11x576xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5x5x576xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<1x7x7x576xf32>
// expected-error @+1 {{expected the fourth level of tile size to be [0, 1, 0, 0]}}
linalg.depthwise_conv_2d_nhwc_hwc {compilation_info = #compilation} ins(%0, %1 : memref<1x11x11x576xf32>, memref<5x5x576xf32>) outs(%2 : memref<1x7x7x576xf32>)
return
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir
index bee6557..4ca2219 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/link_executables.mlir
@@ -8,11 +8,9 @@
#vulkan_target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_0 {
@@ -94,9 +92,9 @@
%dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@spirv::@dispatch_0) : index
%dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index
%dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
return
}
util.initializer {
@@ -111,9 +109,9 @@
%dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@spirv::@dispatch_0) : index
%dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index
%dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
util.return
}
@@ -165,8 +163,8 @@
// CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_1)
// CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_2)
// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
//
// CHECK: util.initializer
// CHECK-DAG: %[[DISPATCH_0_EXE:.+]] = hal.executable.lookup device(%{{.+}}) executable(@link_executables_linked_spirv) : !hal.executable
@@ -176,8 +174,8 @@
// CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_1)
// CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_spirv::@vulkan_spirv_fb::@dispatch_2)
// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
// -----
@@ -193,11 +191,9 @@
#vulkan_target_1 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.spirv.features = ["vulkan-spirv", "subgroup=1"]}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_0 {
@@ -304,10 +300,10 @@
%dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@spirv::@dispatch_1) : index
%dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@spirv::@dispatch_2) : index
%dispatch_3_ordinal = hal.executable.export.ordinal target(@dispatch_3::@spirv::@dispatch_3) : index
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_3_exe : !hal.executable)[%dispatch_3_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_3_exe : !hal.executable)[%dispatch_3_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
return
}
@@ -387,11 +383,9 @@
#vulkan_target_2 = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.spirv.features = ["vulkan-spirv", "subgroup=2"]}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_0 {
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir
index 5828901..ca050b5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_fusion.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-codegen-spirv-configuration-pipeline, func.func(iree-spirv-lower-executable-target-pass))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 128, 1, 32]]>
#map = affine_map<()[s0] -> (s0 * 32)>
@@ -24,11 +22,11 @@
%c128 = arith.constant 128 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128x2048xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xi4>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128x2048xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x2048xi4>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4096x2048xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%5 = affine.apply #map()[%workgroup_id_y]
@@ -71,9 +69,9 @@
// CHECK-LABEL: func.func @matmul_i4_quant_weight()
// CHECK: %[[A_ALLOC:.+]] = memref.alloc() : memref<32x1x36xf32, #gpu.address_space<workgroup>>
// CHECK: %[[B_ALLOC:.+]] = memref.alloc() : memref<1x32x132xf32, #gpu.address_space<workgroup>>
-// CHECK: %[[WEIGHT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[SCALE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK: %[[ZP_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[WEIGHT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[SCALE_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK: %[[ZP_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for %arg0 = %c0 to %c86 step %c1 iter_args({{.+}}) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>)
// CHECK: %[[SCALE0:.+]] = vector.transfer_read %[[SCALE_BINDING]]
// CHECK: %[[SCALE1:.+]] = vector.transfer_read %[[SCALE_BINDING]]
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir
index 274669d..d3ebf52 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matmul_promotion.mlir
@@ -6,13 +6,11 @@
// Verify pipelining + multi-buffering.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#compilation = #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 16]]>,
@@ -29,10 +27,10 @@
func.func @matmul_f32_128x256x64() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x512xf32>> -> tensor<128x512xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x256xf32>> -> tensor<512x256xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
@@ -94,13 +92,11 @@
// Store in stage 0 of pipeline.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#compilation = #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64, 16]]>,
@@ -117,10 +113,10 @@
func.func @matmul_f32_128x256x64() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x512xf32>> -> tensor<128x512xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x256xf32>> -> tensor<512x256xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
@@ -195,13 +191,11 @@
// Check that fused transposed consumer elementwise op does not cause extra workgroup memory allocations.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#compilation = #iree_codegen.compilation_info<
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 256, 32]]>,
@@ -217,10 +211,10 @@
func.func @matmul_f16_4096x512x512() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>> -> tensor<4096x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x512xf16>> -> tensor<512x512xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf16>> -> tensor<512xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir
index 0948ba9..24c7b74 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_matvec.mlir
@@ -1,13 +1,11 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=cdna2@vulkan --pass-pipeline='builtin.module(iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
@@ -15,11 +13,11 @@
#map3 = affine_map<(d0, d1, d2) -> (d0)>
func.func @i4_dequant_matvec_f32() {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<86x128xf32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<4096xf32>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf32>> -> tensor<4096x86xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
index 6d4d163..9511790 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_reduction.mlir
@@ -2,11 +2,9 @@
// RUN: --pass-pipeline='builtin.module(func.func(iree-codegen-decompose-softmax), iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass))' \
// RUN: %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -21,8 +19,8 @@
%c0 = arith.constant 0 : index
%c10240 = arith.constant 10240 : index
%cst = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<writeonly:tensor<512xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [512, 10240], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x10240xf32>> -> tensor<512x10240xf32>
%3 = tensor.empty() : tensor<512xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<512xf32>) -> tensor<512xf32>
@@ -91,11 +89,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -109,8 +105,8 @@
func.func @warp_reduction_dispatch() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<10x9216x9216xf16>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [10, 9216, 9216], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<10x9216x9216xf16>> -> tensor<10x9216x9216xf16>
%3 = tensor.empty() : tensor<10x9216x9216xf16>
%4 = tensor.empty() : tensor<10x9216xf16>
@@ -150,8 +146,8 @@
// CHECK-DAG: %[[WGIDY:.+]] = hal.interface.workgroup.id[1] : index
// CHECK-DAG: %[[TIDX:.+]] = gpu.thread_id x
-// CHECK-DAG: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: gpu.barrier
// CHECK: %{{.+}}, %{{.+}} = gpu.shuffle xor %{{.+}}, %[[I1]], %[[I32]] : i32
@@ -175,11 +171,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -193,8 +187,8 @@
%cst = arith.constant -3.40282347E+38 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<12x128x40960xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [12, 128, 40960], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<12x128x40960xf32>> -> tensor<12x128x40960xf32>
%3 = tensor.empty() : tensor<12x128x40960xf32>
%4 = linalg.softmax dimension(2) ins(%2 : tensor<12x128x40960xf32>) outs(%3 : tensor<12x128x40960xf32>) -> tensor<12x128x40960xf32>
@@ -283,11 +277,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
iree.gpu.target = #iree_gpu.target<arch = "", features = "spirv:v1.6,cap:Shader", wgp = <
@@ -306,8 +298,8 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%6}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?xf16>>{%6}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%6}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x?xf16>>{%6}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [32, %6], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x?xf16>>{%6} -> tensor<32x?xf16>
%11 = tensor.empty(%6) : tensor<32x?xf16>
%12 = linalg.softmax dimension(1) ins(%10 : tensor<32x?xf16>) outs(%11 : tensor<32x?xf16>) -> tensor<32x?xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
index 3cd1cc8..8aee761 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/lowering_scalar_dispatch.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-spirv-select-lowering-strategy-pass, func.func(iree-spirv-lower-executable-target-pass)))))' -mlir-print-local-scope %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @scalar_dispatch {
hal.executable.variant public @vulkan_spirv_fb target(#hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -18,8 +16,8 @@
%c0 = arith.constant 0 : index
%c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64
%c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<i64>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<i64>>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i64>> -> tensor<i64>
%extracted = tensor.extract %2[] : tensor<i64>
%3 = arith.muli %extracted, %c6364136223846793005_i64 : i64
@@ -34,8 +32,8 @@
// CHECK: func.func @scalar_dispatch()
// CHECK-SAME: translation_info = #iree_codegen.translation_info<SPIRVBaseLowering workgroup_size = [1, 1, 1]>
-// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[SPAN0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[SPAN1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK: memref.load %[[SPAN0]][] : memref<i64, #hal.descriptor_type<storage_buffer>>
// CHECK: arith.muli {{.+}} : i64
// CHECK: arith.addi {{.+}} : i64
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir
index cef2338..d302ed4 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/materialize_executable_conditions.mlir
@@ -1,19 +1,15 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-spirv-materialize-executable-conditions)))' --mlir-print-local-scope %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
-#indirect_pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ], flags = Indirect>
+#indirect_pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_executable {
@@ -21,10 +17,10 @@
// CHECK-SAME: target(<"vulkan-spirv", "vulkan-spirv-fb", {iree.spirv.features = ["vulkan-spirv"]}>)
// CHECK-NOT: hal.executable.condition
hal.executable.variant public @test_assumed_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader, GroupNonUniform], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader, GroupNonUniform], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_assumed_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -54,10 +50,10 @@
// CHECK-NEXT: hal.return %[[RESULT]] : i1
// CHECK-NEXT: }
hal.executable.variant public @test_subgroup_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [GroupNonUniformShuffle, GroupNonUniformArithmetic], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [GroupNonUniformShuffle, GroupNonUniformArithmetic], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_subgroup_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -87,10 +83,10 @@
// CHECK-NEXT: hal.return %[[RESULT]] : i1
// CHECK-NEXT: }
hal.executable.variant public @test_8bit_storage_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [UniformAndStorageBuffer8BitAccess, StorageBuffer8BitAccess], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [UniformAndStorageBuffer8BitAccess, StorageBuffer8BitAccess], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_8bit_storage_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -121,10 +117,10 @@
// CHECK-NEXT: hal.return %[[RESULT]] : i1
// CHECK-NEXT: }
hal.executable.variant public @test_16bit_storage_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [StorageBuffer16BitAccess, StorageUniform16], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [StorageBuffer16BitAccess, StorageUniform16], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_16bit_storage_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -147,10 +143,10 @@
// CHECK: %[[TARGET:.+]] = arith.constant 7 : i32
// CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
hal.executable.variant public @test_int_compute_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Int64, Int16, Int8], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Int64, Int16, Int8], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_int_compute_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -172,10 +168,10 @@
// CHECK: %[[TARGET:.+]] = arith.constant 3 : i32
// CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
hal.executable.variant public @test_float_compute_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Float16, Float64], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Float16, Float64], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_float_compute_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -197,10 +193,10 @@
// CHECK: %[[TARGET:.+]] = arith.constant 1 : i32
// CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
hal.executable.variant public @test_dot_product_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [DotProduct, DotProductInput4x8Bit], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [DotProduct, DotProductInput4x8Bit], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_dot_product_capabilities ordinal(0) layout(#pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
@@ -222,10 +218,10 @@
// CHECK: %[[TARGET:.+]] = arith.constant 1 : i32
// CHECK: %{{.+}} = arith.andi %[[V]], %[[TARGET]] : i32
hal.executable.variant public @test_cooperative_matrix_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [CooperativeMatrixKHR], []>, #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [CooperativeMatrixKHR], []>, #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_cooperative_matrix_capabilities ordinal(0) layout(#pipeline_layout) attributes {
iree.spirv.coopmatrix.shape = array<i64: 16, 16, 16>, iree.spirv.coopmatrix.type = [f16, f16]
} {
@@ -254,13 +250,13 @@
// CHECK: %[[TARGET1:.+]] = arith.constant 1 : i32
// CHECK: %{{.+}} = arith.andi %[[V1]], %[[TARGET1]] : i32
hal.executable.variant public @test_address_capabilities target(
- #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb-ptr", {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.5,
- [Int64, PhysicalStorageBufferAddresses],
- [SPV_KHR_physical_storage_buffer]>,
- #spirv.resource_limits<>>
- }>
- ) {
+ #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb-ptr", {
+ spirv.target_env = #spirv.target_env<#spirv.vce<v1.5,
+ [Int64, PhysicalStorageBufferAddresses],
+ [SPV_KHR_physical_storage_buffer]>,
+ #spirv.resource_limits<>>
+ }>
+ ) {
hal.executable.export public @test_address_capabilities ordinal(0) layout(#indirect_pipeline_layout) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir
index 65cf1e5..b95b1b8 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/physical_storage_buffer_addresses.mlir
@@ -2,12 +2,10 @@
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-convert-to-spirv{index-bits=64}))))' \
// RUN: %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ], flags = Indirect>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @interface_binding {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb-ptr">) {
@@ -21,9 +19,9 @@
} {
func.func @interface_binding() -> f32 {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<4x5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<4x5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
%3 = memref.load %0[%c0, %c0] : memref<8x5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
%4 = memref.load %1[%c0] : memref<5xf32, #spirv.storage_class<PhysicalStorageBuffer>>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
index ed932b8..254d09d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_cooperative_ops.mlir
@@ -6,14 +6,12 @@
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline, canonicalize, cse)))' \
// RUN: %s | FileCheck %s --check-prefix=RDNA3
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @matmul_256x1024x128_div_exp {
@@ -29,11 +27,11 @@
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<256x128xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<readonly:tensor<128x1024xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) : !flow.dispatch.tensor<writeonly:tensor<256x1024xf16>>
%11 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%14 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x1024xf16>> -> tensor<256x1024xf16>
%17 = tensor.empty() : tensor<256x1024xf16>
@@ -196,13 +194,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @batch_matmul_16x128x256x512_div {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -215,10 +211,10 @@
func.func @batch_matmul_16x128x256x512_div() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16x128x256xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [16, 128, 512], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x512xf16>> -> tensor<16x128x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [16, 512, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x512x256xf16>> -> tensor<16x512x256xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [16, 128, 256], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x128x256xf16>> -> tensor<16x128x256xf16>
@@ -301,13 +297,11 @@
// Small matmul that each subgroup only handles one tile
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @matmul_32x32x32_div {
@@ -321,10 +315,10 @@
func.func @matmul_32x32x32_div() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x32xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x32xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf16>> -> tensor<32x32xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf16>> -> tensor<32x32xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [32, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x32xf16>> -> tensor<32x32xf16>
@@ -355,12 +349,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable public @generic_batch_matmul_32x128x512x64 {
@@ -374,9 +366,9 @@
func.func @generic_batch_matmul_32x128x512x64() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x128x64xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x512xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x128x64xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<64x512xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x128x512xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [32, 128, 64], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<32x128x64xf16>> -> tensor<32x128x64xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [64, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x512xf16>> -> tensor<64x512xf16>
%5 = tensor.empty() : tensor<32x128x512xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir
index 046c289..9e28eb0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_promotion.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -21,10 +19,10 @@
func.func @matmul_f32_128x256x64() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x512xf32>> -> tensor<128x512xf32>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x256xf32>> -> tensor<512x256xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf32>> -> tensor<128x256xf32>
@@ -75,13 +73,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
@@ -96,10 +92,10 @@
func.func @matmul_f16_128x256x64() {
%cst = arith.constant 0.0 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x512xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<512x256xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<128x256xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x256xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x512xf16>> -> tensor<128x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x256xf16>> -> tensor<512x256xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xf16>> -> tensor<128x256xf16>
@@ -153,12 +149,10 @@
// Check scalar load/store for promotion to shared memory.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#user_config = #iree_codegen.compilation_info<
@@ -176,9 +170,9 @@
func.func @matmul_f16_32x1280x1280() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x1280xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x1280xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32x1280xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x1280xf16>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [32, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<32x1280xf16>> -> tensor<32x1280xf16>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1280, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1280x1280xf16>> -> tensor<1280x1280xf16>
%5 = tensor.empty() : tensor<32x1280xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index f9a186c..503781c 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=valhall1 --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @fuse_and_vectorize_fill_matmul {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -19,9 +17,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%c4096 = arith.constant 4096 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
%8 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>> -> tensor<4096x4096xf32>
%10 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4096, 4096], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>> -> tensor<4096x4096xf32>
%15 = tensor.empty() : tensor<4096x4096xf32>
@@ -44,13 +42,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @fuse_and_vectorize_matmul_add {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -65,10 +61,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c1024 = arith.constant 1024 : index
%c256 = arith.constant 256 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x256xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1024x256xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x256xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<readonly:tensor<512x256xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) : !flow.dispatch.tensor<writeonly:tensor<1024x256xf32>>
%10 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x256xf32>> -> tensor<1024x256xf32>
%13 = tensor.empty() : tensor<1024x256xf32>
%15 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x512xf32>> -> tensor<1024x512xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
index aed1a32..41a2c4a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
@@ -2,14 +2,12 @@
// RUN: --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' \
// RUN: %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @i4_dequant_unit_matmul_f16 {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -28,11 +26,11 @@
func.func @i4_dequant_unit_matmul_f16() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x86x128xf16>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x86x128xf16>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x1x4096xf16>>
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%6 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>> -> tensor<4096x86x1xf16>
%7 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0], sizes = [4096, 86, 1], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x1xf16>> -> tensor<4096x86x1xf16>
@@ -114,12 +112,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 5, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 5, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @i4_dequant_matvec_f16_subgroup_64 {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb", {
@@ -147,11 +143,11 @@
%7 = arith.index_castui %2 : i32 to index
%8 = arith.index_castui %3 : i32 to index
%9 = arith.index_castui %4 : i32 to index
- %10 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
- %12 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
- %13 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128xf16>>
- %14 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
+ %10 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>>
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
+ %12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>>
+ %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<86x128xf16>>
+ %14 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<4096xf16>>
%15 = flow.dispatch.tensor.load %10, offsets = [0, 0, 0], sizes = [4096, 86, 128], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86x128xi4>> -> tensor<4096x86x128xi4>
%16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>> -> tensor<4096x86xf16>
%17 = flow.dispatch.tensor.load %12, offsets = [0, 0], sizes = [4096, 86], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x86xf16>> -> tensor<4096x86xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
index 03a12aa..f514357 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_reduction_subgroup.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=valhall1 --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s --check-prefix=NOSHUFFLE
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @subgroup_reduce {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -18,8 +16,8 @@
func.func @subgroup_reduce() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x512xf32>> -> tensor<2x512xf32>
%3 = tensor.empty() : tensor<2xf32>
%4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<2xf32>) -> tensor<2xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
index b2e8d5b..c799a0d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --iree-gpu-test-target=vp_android_baseline_2022@vulkan --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(iree-codegen-spirv-configuration-pipeline), iree-codegen-linalg-to-spirv-pipeline)))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @i4_dequant {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -18,10 +16,10 @@
builtin.module {
func.func @i4_dequant() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<131072x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<131072xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<131072x128xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [131072, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<131072x128xi4>> -> tensor<131072x128xi4>
%5 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor<readonly:tensor<131072xf32>> -> tensor<131072xf32>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [131072], strides = [1] : !flow.dispatch.tensor<readonly:tensor<131072xf32>> -> tensor<131072xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir
index 331832e..d32855d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/set_transform_strategy.mlir
@@ -6,19 +6,17 @@
// core, but there are no such wmma intrinsics. Fix it to support fp16-input.
// TODO: | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul() {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2052x2052xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2052, 2556], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2052x2556xf32>> -> tensor<2052x2556xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2556, 2052], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2556x2052xf32>> -> tensor<2556x2052xf32>
%5 = tensor.empty() : tensor<2052x2052xf32>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir
index 27dbd92..9bc5667 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute.mlir
@@ -10,12 +10,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[8, 16], [1, 1], [0, 0, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 3, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matmul {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -29,9 +27,9 @@
%M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
%K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
- %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xf32>{%M, %K}
- %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xf32>{%K, %N}
- %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<?x?xf32>{%M, %N}
+ %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xf32>{%M, %K}
+ %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xf32>{%K, %N}
+ %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<?x?xf32>{%M, %N}
%c4 = arith.constant 4 : index
%c1 = arith.constant 1 : index
%0 = memref.dim %arg0, %c1 : memref<?x?xf32>
@@ -80,12 +78,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 4, 32], [1, 1, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv_1d {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -97,9 +93,9 @@
func.func @conv_1d() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<3x6x1xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<3x8x1xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x1x1xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<3x6x1xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<3x8x1xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x1x1xf32>
%3 = gpu.block_id x
%4 = gpu.block_id y
%5 = gpu.block_id z
@@ -125,9 +121,9 @@
}
// CHECK-LABEL: func.func @conv_1d
-// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK-DAG: %[[RET:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-DAG: %[[ARG0SV1:.+]] = memref.subview %[[ARG0]]
// CHECK-DAG: %[[ARG1SV1:.+]] = memref.subview %[[ARG1]]
// CHECK-DAG: %[[RETSV1:.+]] = memref.subview %[[RET]]
@@ -157,12 +153,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 9, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 9, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv_2d {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -182,9 +176,9 @@
%ic = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
%fh = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index
%fw = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index
- %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?x?x?xf32>{%n, %ih, %iw, %ic}
- %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?x?x?xf32>{%fh, %fw, %ic, %oc}
- %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<?x?x?x?xf32>{%n, %oh, %ow, %oc}
+ %arg0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?x?x?xf32>{%n, %ih, %iw, %ic}
+ %arg1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?x?x?xf32>{%fh, %fw, %ic, %oc}
+ %arg2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<?x?x?x?xf32>{%n, %oh, %ow, %oc}
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%c1 = arith.constant 1 : index
@@ -239,9 +233,9 @@
// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 4)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 32)>
// CHECK: func.func @conv_2d
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0
// CHECK-DAG: %[[C1:.+]] = arith.constant 1
// CHECK-DAG: %[[C4:.+]] = arith.constant 4
@@ -272,12 +266,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1, 4, 32], [0, 0, 1, 1, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @conv_3d {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -289,9 +281,9 @@
func.func @conv_3d() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x7x7x7x2xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x8x8x8x3xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<2x2x2x3x2xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x7x7x7x2xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x8x8x8x3xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<2x2x2x3x2xf32>
%3 = gpu.block_id x
%4 = gpu.block_id y
%5 = gpu.block_id z
@@ -342,12 +334,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 4, 32], [1, 1, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
module {
hal.executable private @pooling_nhwc_max {
@@ -359,9 +349,9 @@
builtin.module {
func.func @pooling_nhwc_max() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2x16x16x6xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<3x4xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<2x14x13x6xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2x16x16x6xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<3x4xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<2x14x13x6xf32>
%3 = gpu.block_id x
%4 = gpu.block_id y
%5 = affine.apply #map0()[%4]
@@ -385,9 +375,9 @@
// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 4)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 32)>
// CHECK: func.func @pooling_nhwc_max
-// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK-DAG: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK-DAG: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK-DAG: %[[RET0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: %[[SV1:.+]] = memref.subview %[[ARG0]]
// CHECK: %[[SV2:.+]] = memref.subview %[[RET0]]
// CHECK-DAG: %[[TIDX:.+]] = gpu.thread_id x
@@ -409,12 +399,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[32], [1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable @matvec {
@@ -428,9 +416,9 @@
%c250 = arith.constant 250 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<250x1024xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<250xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<250x1024xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<250xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_x]
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
index 8ce2c91..63c4d2e 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
@@ -2,12 +2,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 16], [1, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @static_scatter_update_slice {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -20,9 +18,9 @@
%c40 = arith.constant 40 : index
%c500 = arith.constant 500 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<40x500xi32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<40x1xi32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : memref<100x500xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<40x500xi32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<40x1xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : memref<100x500xi32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -50,9 +48,9 @@
}
// CHECK-LABEL: func.func @static_scatter_update_slice()
-// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
-// CHECK: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[ARG1:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+// CHECK: %[[ARG2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// CHECK: scf.for
// CHECK: scf.for
// CHECK: %[[WG_UPDATE:.+]] = memref.subview %[[ARG0]]
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
index 42738b7..051cd3d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
@@ -2,10 +2,8 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 0, 16], [1, 0, 1]]>
#translation = #iree_codegen.translation_info<SPIRVBaseDistribute>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @static_3d_sort {
hal.executable.variant @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -16,7 +14,7 @@
builtin.module {
func.func @static_3d_sort() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<64x32x128xi32, #hal.descriptor_type<storage_buffer>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<64x32x128xi32, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %0, 64 : memref<64x32x128xi32, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -34,7 +32,7 @@
}
// CHECK-LABEL: func.func @static_3d_sort()
-// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
+// CHECK: %[[ARG0:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
// CHECK: %[[WG_OUTPUT:.+]] = memref.subview %[[ARG0]]
// CHECK: %[[TID_X:.+]] = gpu.thread_id x
// CHECK: %[[DIM_X:.+]] = gpu.block_dim x
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir
index daa7c7e..afc60d5 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_cooperative_matrix.mlir
@@ -8,13 +8,11 @@
// Single tile per workgroup means no subview ops for promotion.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32, 32], [16, 16, 16], [0, 0, 32]]>
#map = affine_map<()[s0] -> (s0 * 32)>
@@ -24,13 +22,13 @@
%c32 = arith.constant 32 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<32x32xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<32x32xf16>
memref.assume_alignment %0, 64 : memref<32x32xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x32xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x32xf16>
memref.assume_alignment %1, 64 : memref<32x32xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x32xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x32xf16>
memref.assume_alignment %2, 64 : memref<32x32xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<32x32xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<32x32xf16>
memref.assume_alignment %3, 64 : memref<32x32xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -56,8 +54,8 @@
// CHECK-LABEL: func.func @matmul_f16_32x32x32()
-// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0)
-// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1)
+// CHECK: %[[LHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+// CHECK: %[[RHS:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
// CHECK-NOT: memref.alloc()
// CHECK-NOT: memref.copy
@@ -69,12 +67,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
#map = affine_map<()[s0] -> (s0 * 32)>
@@ -89,10 +85,10 @@
%c512 = arith.constant 512 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -183,12 +179,10 @@
// Cooperative matrix fusable elementwise ops do not need promote C.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
#map = affine_map<()[s0] -> (s0 * 32)>
@@ -203,10 +197,10 @@
%c512 = arith.constant 512 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -267,12 +261,10 @@
// No need to promote C if there is no fused element wise ops.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32, 32], [1, 16, 16, 16], [0, 0, 0, 32]]>
#map = affine_map<()[s0] -> (s0 * 32)>
@@ -286,9 +278,9 @@
%c512 = arith.constant 512 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<128x32x64xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x64x512xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<32x128x512xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -325,7 +317,7 @@
// PROMOTEC: %[[LHS_ALLOC:.+]] = memref.alloc() : memref<32x1x32xf16, #gpu.address_space<workgroup>>
// PROMOTEC-NOT: memref.alloc()
-// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
+// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
// PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN2]]
// PROMOTEC: linalg.fill
@@ -352,12 +344,10 @@
// No need to promote again with allocations from bufferization.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 128], [1, 32, 64], [0, 0, 0, 32], [1, 16, 16, 16]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -368,9 +358,9 @@
%c4096 = arith.constant 4096 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x4096x512xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1x512x4096xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<1x4096x4096xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x4096x512xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1x512x4096xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<1x4096x4096xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -429,13 +419,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -448,10 +436,10 @@
%c4096 = arith.constant 4096 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x64xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<4096xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x64xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<4096xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -485,8 +473,8 @@
// PROMOTEC-DAG: %[[RHS_ALLOC:.+]] = memref.alloc() : memref<32x128xf16, #gpu.address_space<workgroup>>
// PROMOTEC-NOT: memref.alloc()
-// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN3]]
// PROMOTEC: linalg.fill
@@ -520,13 +508,11 @@
// Transposed+broadcasted elementwise ops does not need promoting C matrix.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -539,10 +525,10 @@
%c4096 = arith.constant 4096 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x64xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<512xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x64xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<64x4096xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<512xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<512x4096xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -576,8 +562,8 @@
// PROMOTEC-DAG: %[[RHS_ALLOC:.+]] = memref.alloc() : memref<32x128xf16, #gpu.address_space<workgroup>>
// PROMOTEC-NOT: memref.alloc()
-// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2)
-// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3)
+// PROMOTEC: %[[SPAN2:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+// PROMOTEC: %[[SPAN3:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3)
// PROMOTEC: %[[OUT_VIEW:.+]] = memref.subview %[[SPAN3]]
// PROMOTEC: linalg.fill
@@ -611,12 +597,10 @@
// Inlined large constant array needs promoting C matrix.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -633,9 +617,9 @@
%cst = arith.constant 0.000000e+00 : f16
%cst_0 = arith.constant dense<"0x69222B2E40A3002A45AC1AAB2E2E202DA21C212680264C2A102314A041A7D029CB28352E5BAAD3B02F299D9A142B8AA1D1285C28412B25AF9A24EE2BA22C242D53AD9E2948A9289FCF301D28012F08AD68A6DD20ECAC912465290B2E9420C5AA50A222A912AB9526B62ADA2039AD4D912C9FDD287B20B224D329BA2A4D2C41A76DAB7E30B027F62ED1A0F1273A2BAE9D0FA48029812992A65AA92A2C9C2EE9A744A4632C5FA8A9A4CF2D70A482A0F5A2DBA7B6304B9D22A52B1B9DA8E424722AB5ACD0248A2B8B29C82D782E402D1A99F0A60CA4DE2DD32815266F2A6B247FA6FE214E2853AA402390AB6925F1A339307F2664A23CACBE28BA2B3D286DB0BA2E"> : tensor<128xf16>
%0 = bufferization.to_memref %cst_0 : memref<128xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c96565312) : memref<128x2304xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c806357120) : memref<2304x262144xf16>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c134217728) : memref<128x262144xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c96565312) : memref<128x2304xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c806357120) : memref<2304x262144xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c134217728) : memref<128x262144xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir
index 36510ae..cc235c4 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_promote_matmul.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --mlir-print-local-scope --iree-gpu-test-target=pascal@vulkan --pass-pipeline='builtin.module(func.func(iree-spirv-tile-and-promote, cse))' %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[128, 128], [16, 4], [0, 0, 32]]>
#map = affine_map<()[s0] -> (s0 * 128)>
@@ -19,10 +17,10 @@
%c256 = arith.constant 256 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x128xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<128x1024xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x1024xf32>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<256x1024xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x128xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<128x1024xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x1024xf32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<256x1024xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -58,10 +56,10 @@
// CHECK-DAG: %[[MEM_A:.+]] = memref.alloc() : memref<128x32xf32, #gpu.address_space<workgroup>>
// CHECK-DAG: %[[MEM_B:.+]] = memref.alloc() : memref<32x128xf32, #gpu.address_space<workgroup>>
-// CHECK-DAG: %[[BUFFER_A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) {{.+}} : memref<256x128xf32>
-// CHECK-DAG: %[[BUFFER_B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) {{.+}} : memref<128x1024xf32>
-// CHECK-DAG: %[[BUFFER_C:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(3) {{.+}} : memref<256x1024xf32>
-// CHECK-DAG: %[[BUFFER_D:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(2) {{.+}} : memref<256x1024xf32>
+// CHECK-DAG: %[[BUFFER_A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) {{.+}} : memref<256x128xf32>
+// CHECK-DAG: %[[BUFFER_B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.+}} : memref<128x1024xf32>
+// CHECK-DAG: %[[BUFFER_C:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(3) {{.+}} : memref<256x1024xf32>
+// CHECK-DAG: %[[BUFFER_D:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2) {{.+}} : memref<256x1024xf32>
// CHECK: scf.for
// CHECK: scf.for
@@ -112,12 +110,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 64, 256], [1, 8, 8], [0, 0, 0, 16]]>
#map = affine_map<()[s0] -> (s0 * 64)>
@@ -130,9 +126,9 @@
%c1024 = arith.constant 1024 : index
%cst = arith.constant 0.111803398 : f32
%cst_0 = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x1024x80xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x80x1024xf16>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x1024x1024xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x1024x80xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x80x1024xf16>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x1024x1024xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -179,12 +175,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 512, 8], [1, 8, 4], [0, 0, 0, 16]]>
#map = affine_map<()[s0] -> (s0 * 512)>
@@ -196,9 +190,9 @@
%c40 = arith.constant 40 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x4096x4096xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x4096x40xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x4096x40xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x4096x4096xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x4096x40xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x4096x40xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index f04d688..aa1e45f 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
@@ -4,12 +4,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @fused_fill_batch_matmul {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -23,9 +21,9 @@
%cst = arith.constant 0.000000e+00 : f32
%c4 = arith.constant 4 : index
%c1024 = arith.constant 1024 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x1024x1024xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x1024xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x1024x1024xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4x1024x1024xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4x1024x1024xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4x1024x1024xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
index e0faab5..ec4976c 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
@@ -4,12 +4,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @nhwc_conv_static_shape_f32 {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -23,9 +21,9 @@
%c16 = arith.constant 16 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x225x225x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x8x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x16xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -78,12 +76,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @nhwc_nhwc_depthwise_conv_static_shape_f32 {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -97,9 +93,9 @@
%c96 = arith.constant 96 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x113x113x96xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x56x56x96xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x113x113x96xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x96xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x56x56x96xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -148,13 +144,11 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @low_padded_conv {
@@ -176,10 +170,10 @@
%c0 = arith.constant 0 : index
%c112 = arith.constant 112 : index
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x224x224x3xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x224x224x3xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%4 = tensor.empty() : tensor<1x112x112x32xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -266,13 +260,11 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 4, 32], [0, 1, 2, 4], [0, 0, 0, 0, 1, 1], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @low_high_padded_nhwc_depthwise_conv {
@@ -294,10 +286,10 @@
%c0 = arith.constant 0 : index
%c112 = arith.constant 112 : index
%c32 = arith.constant 32 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x32xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1x112x112x32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x32xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32xf32>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x112x112x32xf32>>
%4 = tensor.empty() : tensor<1x112x112x32xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -388,12 +380,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 16, 8, 8], [0, 8, 1, 4], [0, 0, 0, 0, 4, 1, 1], [0, 0, 1, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @nchw_conv_static_shape_f32 {
@@ -407,9 +397,9 @@
%c1280 = arith.constant 1280 : index
%c8 = arith.constant 8 : index
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x1280x10x10xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280x3x3xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x1280x8x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x1280x10x10xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<1280x1280x3x3xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<2x1280x8x8xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -462,13 +452,11 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 1, 64, 64], [1, 1, 8, 8], [0, 0, 0, 0, 1, 1, 8], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @nhwc_conv_static_shape_f16_batch2 {
@@ -483,10 +471,10 @@
%c320 = arith.constant 320 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x66x66x320xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x320x320xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x64x320xf16>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x64x64x320xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x66x66x320xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3x3x320x320xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2x64x64x320xf16>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x64x64x320xf16>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index e194b30..5487820 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
@@ -3,12 +3,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matmul_static_shape_f16 {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -21,9 +19,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f16
%c4096 = arith.constant 4096 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf16>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf16>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf16>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf16>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf16>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf16>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -65,12 +63,10 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @matmul_static_shape_f32 {
hal.executable.variant @vulkan target(<"vulkan-spirv", "vulkan-spirv-fb">) {
@@ -83,9 +79,9 @@
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%c4096 = arith.constant 4096 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<4096x4096xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<4096x4096xf32>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir
index 9c43800..fb3e57c 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_pooling.mlir
@@ -4,11 +4,9 @@
#config = #iree_codegen.lowering_config<tile_sizes = [[0, 2, 2, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1], [0, 1, 0, 0]]>
#translation = #iree_codegen.translation_info<SPIRVBaseVectorize>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @pooling_nhwc_sum_f32 {
@@ -28,8 +26,8 @@
%c8 = arith.constant 8 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x24x24x8xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x8xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x24x24x8xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1x2x2x8xf32>>
%2 = tensor.empty() : tensor<12x12xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index a27be5d..cc56261 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
@@ -2,14 +2,12 @@
// RUN: --pass-pipeline='builtin.module(func.func(iree-spirv-tile-to-cooperative-ops, iree-codegen-generic-vectorization, iree-spirv-vectorize-to-cooperative-ops, iree-codegen-optimize-tensor-insert-extract-slices, canonicalize, cse))' \
// RUN: %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32], [16, 16], [0, 0, 32], [16, 16, 16]]>
#translation = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize workgroup_size = [32, 1, 1]>
@@ -23,11 +21,11 @@
%2 = gpu.thread_id z
%alloc = memref.alloc() : memref<32x32xf16, 3>
%alloc_0 = memref.alloc() : memref<32x32xf16, 3>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x1024xf16>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024x128xf16>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<256x128xf16>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<256x128xf16>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : memref<256x128xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x1024xf16>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024x128xf16>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x128xf16>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<256x128xf16>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(4) alignment(64) offset(%c0) : memref<256x128xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%8 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y]
@@ -134,13 +132,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[1, 32, 32], [1, 16, 16], [0, 0, 0, 32], [1, 16, 16, 16]]>
#translation = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize workgroup_size=[32, 1, 1]>
@@ -155,13 +151,13 @@
%2 = gpu.thread_id z
%alloc = memref.alloc() : memref<1x32x32xf16, 3>
%alloc_0 = memref.alloc() : memref<1x32x32xf16, 3>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<16x128x512xf16>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<16x128x512xf16>
memref.assume_alignment %3, 64 : memref<16x128x512xf16>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<16x512x256xf16>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<16x512x256xf16>
memref.assume_alignment %4, 64 : memref<16x512x256xf16>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<16x128x256xf16>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<16x128x256xf16>
memref.assume_alignment %5, 64 : memref<16x128x256xf16>
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : memref<16x128x256xf16>
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : memref<16x128x256xf16>
memref.assume_alignment %6, 64 : memref<16x128x256xf16>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -272,12 +268,10 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#config = #iree_codegen.lowering_config<tile_sizes = [[32, 32], [16, 16], [0, 0, 32], [16, 16, 16]]>
#translation = #iree_codegen.translation_info<SPIRVCooperativeMatrixVectorize workgroup_size=[32, 1, 1]>
@@ -292,9 +286,9 @@
%2 = gpu.thread_id z
%alloc = memref.alloc() : memref<32x32xi8, 3>
%alloc_0 = memref.alloc() : memref<32x32xi8, 3>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<256x1024xi8>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<1024x128xi8>
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(4) alignment(64) offset(%c0) : memref<256x128xi32>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<256x1024xi8>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<1024x128xi8>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<256x128xi32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%8 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%workgroup_id_y]
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir
index 07a831e..9c2477a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/trim_executable_target_env.mlir
@@ -2,14 +2,18 @@
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, GroupNonUniformArithmetic],
- [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>,
- api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>}>
-
+ [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>,
+ api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>
+}>
// CHECK-DAG: #[[$TARGET0:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
// CHECK-DAG: #[[$TARGET1:.+]] = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.3, [Shader, GroupNonUniformArithmetic], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
hal.executable private @predict_dispatch_0 {
// CHECK-LABEL: hal.executable.variant public @vulkan_spirv_fb0
@@ -22,10 +26,12 @@
hal.return %c2, %c1, %c1 : index, index, index
}
// CHECK-NOT: spirv.target_env
- builtin.module attributes {spirv.target_env = #spirv.target_env<
+ builtin.module attributes {
+ spirv.target_env = #spirv.target_env<
#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8],
[SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>,
- api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>} {
+ api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>
+ } {
spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
spirv.func @predict_dispatch_0_vecmat_128x784_f32() "None" {
spirv.Return
@@ -48,10 +54,12 @@
hal.return %c10, %c1, %c1 : index, index, index
}
// CHECK-NOT: spirv.target_env
- builtin.module attributes {spirv.target_env = #spirv.target_env<
+ builtin.module attributes {
+ spirv.target_env = #spirv.target_env<
#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8],
[SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class]>,
- api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>} {
+ api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<>>
+ } {
spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, GroupNonUniformArithmetic], [SPV_KHR_storage_buffer_storage_class]> {
spirv.func @predict_dispatch_1_vecmat_10x128_f32() "None" {
spirv.Return
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir
index 0a2fb48..93a861d 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/vectorize_load_store.mlir
@@ -49,23 +49,21 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy()
-// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x1024xvector<4xf32>>
-// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf32>>
+// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x1024xvector<4xf32>>
+// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf32>>
// CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>>
// CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>>
func.func @resource_copy() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf32>
%v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf32>, vector<4xf32>
vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x4096xf32>
return
@@ -73,24 +71,22 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy_with_offset()
-// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) offset(%{{.*}}) : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>>
-// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf32>>
+// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) offset(%{{.*}}) : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>>
+// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf32>>
// CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}, %{{.*}}] : memref<2048x4096x1024xvector<4xf32>, strided<[4194304, 1024, 1], offset: ?>>
// CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf32>>
func.func @resource_copy_with_offset() {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%offset = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%offset) : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%offset) : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf32>
%v = vector.transfer_read %0[%c0, %c0, %c0], %cst : memref<2048x4096x4096xf32, strided<[16777216, 4096, 1], offset: ?>>, vector<4xf32>
vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x4096xf32>
return
@@ -98,23 +94,21 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy_f16
-// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x1024xvector<4xf16>>
-// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x1024xvector<4xf16>>
+// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x1024xvector<4xf16>>
+// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x1024xvector<4xf16>>
// CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf16>>
// CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x1024xvector<4xf16>>
func.func @resource_copy_f16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf16>
%v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf16>, vector<4xf16>
vector.transfer_write %v, %1[%c0, %c0] : vector<4xf16>, memref<4096x4096xf16>
return
@@ -122,23 +116,21 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy_8xf16
-// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x512xvector<4xf32>>
-// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x512xvector<4xf32>>
+// CHECK: %[[A:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x512xvector<4xf32>>
+// CHECK: %[[B:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x512xvector<4xf32>>
// CHECK: %[[V:.+]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<4096x512xvector<4xf32>>
// CHECK: memref.store %[[V]], %[[B]][%{{.*}}, %{{.*}}] : memref<4096x512xvector<4xf32>>
func.func @resource_copy_8xf16() {
%cst = arith.constant 0.000000e+00 : f16
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x4096xf16>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x4096xf16>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x4096xf16>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x4096xf16>
%v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x4096xf16>, vector<8xf16>
vector.transfer_write %v, %1[%c0, %c0] : vector<8xf16>, memref<4096x4096xf16>
return
@@ -146,11 +138,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy_dynamic_shape()
@@ -162,10 +152,10 @@
%dim0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
%dim1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
- // CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<?x8x?x32xvector<4xf32>>{%[[DIM0]], %[[DIM1]]}
- // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<?x8x?x32xvector<4xf32>>{%[[DIM0]], %[[DIM1]]}
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x8x?x128xf32>{%dim0, %dim1}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x8x?x128xf32>{%dim0, %dim1}
+ // CHECK: %[[INPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<?x8x?x32xvector<4xf32>>{%[[DIM0]], %[[DIM1]]}
+ // CHECK: %[[OUTPUT:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<?x8x?x32xvector<4xf32>>{%[[DIM0]], %[[DIM1]]}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x8x?x128xf32>{%dim0, %dim1}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x8x?x128xf32>{%dim0, %dim1}
// CHECK: %[[VAL:.+]] = memref.load %[[INPUT]]
// CHECK: memref.store %[[VAL]], %[[OUTPUT]]
@@ -177,11 +167,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @resource_copy_dynamic_last_dim()
@@ -189,10 +177,10 @@
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%dim = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
- // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<4096x?xf32>
- // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(1) : memref<4096x?xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4096x?xf32>{%dim}
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4096x?xf32>{%dim}
+ // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<4096x?xf32>
+ // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(1) : memref<4096x?xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4096x?xf32>{%dim}
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4096x?xf32>{%dim}
%v = vector.transfer_read %0[%c0, %c0], %cst : memref<4096x?xf32>, vector<4xf32>
vector.transfer_write %v, %1[%c0, %c0] : vector<4xf32>, memref<4096x?xf32>
return
@@ -200,11 +188,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @dont_vectorize_odd_vector_size
@@ -213,10 +199,10 @@
%c0 = arith.constant 0 : index
// CHECK: hal.interface.binding.subspan
// CHECK-SAME: memref<4x3xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4x3xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4x3xf32>
// CHECK: hal.interface.binding.subspan
// CHECK-SAME: memref<4x3xf32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<4x3xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<4x3xf32>
%v = vector.transfer_read %0[%c0, %c0], %cst : memref<4x3xf32>, vector<3xf32>
vector.transfer_write %v, %1[%c0, %c0] : vector<3xf32>, memref<4x3xf32>
return
@@ -224,11 +210,9 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @scalarize_vector_transfer_op
@@ -236,8 +220,8 @@
%c0 = arith.constant 0: index
%c3 = arith.constant 3: index
%f0 = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<20xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<20xf32>
// CHECK-DAG: %[[INDEX0:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[INDEX1:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[INDEX2:.+]] = arith.constant 5 : index
@@ -289,17 +273,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @scalarize_non_minor_identity_transfer_write
// CHECK-SAME: (%[[VALUE:.+]]: vector<4xf32>, %[[I1:.+]]: index, %[[I2:.+]]: index)
func.func @scalarize_non_minor_identity_transfer_write(%value: vector<4xf32>, %i1: index, %i2: index) {
%c0 = arith.constant 0: index
- %buffer = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<1x130x130x64xf32>
+ %buffer = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<1x130x130x64xf32>
vector.transfer_write %value, %buffer[%c0, %i1, %i2, %c0] {in_bounds = [true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d2)>} : vector<4xf32>, memref<1x130x130x64xf32>
return
}
@@ -346,16 +328,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @scalarize_indivisible_vector_transfer_read_op
func.func @scalarize_indivisible_vector_transfer_read_op(%i: index) -> vector<4xf32> {
%f0 = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10xf32>
%1 = vector.transfer_read %0[%i], %f0 : memref<10xf32>, vector<4xf32>
return %1: vector<4xf32>
}
@@ -366,16 +346,14 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @scalarize_indivisible_vector_transfer_write_op
func.func @scalarize_indivisible_vector_transfer_write_op(%value: vector<4xf32>, %i: index) {
%f0 = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10xf32>
vector.transfer_write %value, %0[%i] : vector<4xf32>, memref<10xf32>
return
}
@@ -434,17 +412,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @scalarize_vector_load_op
// CHECK-SAME: (%[[ARG0:.+]]: index)
func.func @scalarize_vector_load_op(%i: index) -> vector<4xi32> {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<10x10xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<10x10xi32>
%1 = vector.load %0[%c0, %i] : memref<10x10xi32>, vector<4xi32>
return %1: vector<4xi32>
}
@@ -469,35 +445,31 @@
// Test that the memref is not vectorized if the element type is a complex type.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @complex_memref
func.func @complex_memref(%x: index, %y: index) -> complex<f32> {
- // CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<8x32xcomplex<f32>>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<8x32xcomplex<f32>>
+ // CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<8x32xcomplex<f32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<8x32xcomplex<f32>>
%1 = memref.load %0[%x, %y] : memref<8x32xcomplex<f32>>
return %1: complex<f32>
}
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: func.func @vectorize_mma_load_store_non_identity_memref
// CHECK-SAME: (%[[I0:.+]]: index, %[[I1:.+]]: index)
func.func @vectorize_mma_load_store_non_identity_memref(%i0: index, %i1: index) {
%c0 = arith.constant 0 : index
- %span0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>>
- %span1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>>
+ %span0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>>
+ %span1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>>
%val = gpu.subgroup_mma_load_matrix %span0[%i0, %i1] {leadDimension = 1280 : index} : memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>> -> !gpu.mma_matrix<16x16xf16, "COp">
gpu.subgroup_mma_store_matrix %val, %span1[%i0, %i1] {leadDimension = 1280 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x1280xf16, strided<[1280, 1], offset: 11840>, #hal.descriptor_type<storage_buffer>>
return
@@ -512,22 +484,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_read_i4_memref_vector8(%x: index) -> vector<8xi4> {
%c0_i4 = arith.constant 0 : i4
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi4>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi4>
%1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<8xi4>
return %1: vector<8xi4>
}
// CHECK-LABEL: func.func @transfer_read_i4_memref_vector8
// CHECK-SAME: (%[[ARG:.+]]: index)
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<256xvector<1xi32>>
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<256xvector<1xi32>>
// CHECK: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 8)>()[%[[ARG]]]
// CHECK: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<256xvector<1xi32>>
// CHECK: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi32> to vector<8xi4>
@@ -537,14 +507,14 @@
// func.func @transfer_read_i4_memref_vector4(%x: index) -> vector<4xi4> {
// %c0_i4 = arith.constant 0 : i4
-// %0 = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<2048xi4>
+// %0 = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<2048xi4>
// %1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<4xi4>
// return %1: vector<4xi4>
// }
// XXXXX-LABEL: func.func @transfer_read_i4_memref_vector4
// XXXXX-SAME: (%[[ARG:.+]]: index)
-// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<512xvector<2xi8>>
+// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<512xvector<2xi8>>
// XXXXX: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 4)>()[%[[ARG]]]
// XXXXX: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<512xvector<2xi8>>
// XXXXX: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<2xi8> to vector<4xi4>
@@ -552,22 +522,20 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_read_i4_memref_vector2(%x: index) -> vector<2xi4> {
%c0_i4 = arith.constant 0 : i4
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi4>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi4>
%1 = vector.transfer_read %0[%x], %c0_i4 {in_bounds = [true]} : memref<2048xi4>, vector<2xi4>
return %1: vector<2xi4>
}
// XXXXX-LABEL: func.func @transfer_read_i4_memref_vector2
// XXXXX-SAME: (%[[ARG:.+]]: index)
-// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1024xvector<1xi8>>
+// XXXXX: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1024xvector<1xi8>>
// XXXXX: %[[INDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 2)>()[%[[ARG]]]
// XXXXX: %[[LOAD:.+]] = memref.load %[[SUBSPAN]][%[[INDEX]]] : memref<1024xvector<1xi8>>
// XXXXX: %[[CAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<2xi4>
@@ -575,34 +543,30 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_read_i3_memref_vector8(%x: index) -> vector<8xi3> {
%c0_i3 = arith.constant 0 : i3
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi3>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi3>
%1 = vector.transfer_read %0[%x], %c0_i3 {in_bounds = [true]} : memref<2048xi3>, vector<8xi3>
return %1: vector<8xi3>
}
// CHECK-LABEL: func.func @transfer_read_i3_memref_vector8
-// CHECK: hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<2048xi3>
+// CHECK: hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<2048xi3>
// CHECK-COUNT-8: memref.load {{.+}} : memref<2048xi3>
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_read_vector2_vector8(%x: index) -> (vector<2xi32>, vector<8xi32>) {
%c0 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi32>
%1 = vector.transfer_read %0[%x], %c0 {in_bounds = [true]} : memref<2048xi32>, vector<2xi32>
%2 = vector.transfer_read %0[%x], %c0 {in_bounds = [true]} : memref<2048xi32>, vector<8xi32>
return %1, %2: vector<2xi32>, vector<8xi32>
@@ -628,15 +592,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @transfer_write_vector2_vector8(%x: index, %val0: vector<2xi32>, %val1: vector<8xi32>) {
%c0 = arith.constant 0 : i32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<2048xi32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<2048xi32>
vector.transfer_write %val0, %0[%x] : vector<2xi32>, memref<2048xi32>
vector.transfer_write %val1, %0[%x] : vector<8xi32>, memref<2048xi32>
return
@@ -644,7 +606,7 @@
// CHECK-LABEL: func @transfer_write_vector2_vector8
// CHECK-SAME: (%[[INDEX:.+]]: index, %[[VAL0:.+]]: vector<2xi32>, %[[VAL1:.+]]: vector<8xi32>)
-// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) set(0) binding(0) : memref<1024xvector<2xi32>>
+// CHECK: %[[SUBSPAN:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0) : memref<1024xvector<2xi32>>
// CHECK: %[[OFFSET0:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 2)>()[%[[INDEX]]]
// CHECK: memref.store %[[VAL0]], %[[SUBSPAN]][%[[OFFSET0]]]
@@ -663,19 +625,17 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @scalarize_masked_vector_transfer_op(%arg: vector<3xf32>, %mask: vector<3xi1>) -> (vector<3xf32>) {
%c0 = arith.constant 0: index
%c3 = arith.constant 3: index
%f0 = arith.constant 0.0 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<20xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<20xf32>
%3 = vector.transfer_read %0[%c3], %f0, %mask : memref<20xf32>, vector<3xf32>
vector.transfer_write %arg, %2[%c3], %mask : vector<3xf32>, memref<20xf32>
return %3: vector<3xf32>
@@ -722,17 +682,15 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @extract_vector_transfer_read_mask_bits(%arg: vector<3xf32>, %index: index) -> (vector<3xf32>) {
%c3 = arith.constant 3: index
%f0 = arith.constant 0.0 : f32
%mask = vector.create_mask %index : vector<3xi1>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<20xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<20xf32>
%1 = vector.transfer_read %0[%c3], %f0, %mask : memref<20xf32>, vector<3xf32>
return %1: vector<3xf32>
}
diff --git a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
index 337d4ed..3507752 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/Transforms.cpp
@@ -551,7 +551,7 @@
Value newSubspanOp = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(),
- subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(),
+ subspanOp.getBinding(), subspanOp.getByteOffset(),
subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(),
subspanOp.getDescriptorFlagsAttr());
@@ -623,7 +623,7 @@
rewriter.setInsertionPointAfter(subspanOp);
newSubspanOp = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(),
- subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(),
+ subspanOp.getBinding(), subspanOp.getByteOffset(),
subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(),
subspanOp.getDescriptorFlagsAttr());
}
@@ -759,7 +759,7 @@
rewriter.setInsertionPointAfter(subspanOp);
newSubspanOp = rewriter.create<IREE::HAL::InterfaceBindingSubspanOp>(
subspanOp.getLoc(), newSubspanType, subspanOp.getLayout(),
- subspanOp.getSet(), subspanOp.getBinding(), subspanOp.getByteOffset(),
+ subspanOp.getBinding(), subspanOp.getByteOffset(),
subspanOp.getDynamicDims(), subspanOp.getAlignmentAttr(),
subspanOp.getDescriptorFlagsAttr());
}
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir
index 2baedf6..e760eda 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/link_executables.mlir
@@ -1,11 +1,9 @@
// RUN: iree-opt --split-input-file --iree-vmvx-link-executables %s | FileCheck %s
#vmvx_target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_0 {
@@ -87,9 +85,9 @@
%dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@vmvx::@dispatch_0) : index
%dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@vmvx::@dispatch_1) : index
%dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@vmvx::@dispatch_2) : index
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
return
}
util.initializer {
@@ -104,9 +102,9 @@
%dispatch_0_ordinal = hal.executable.export.ordinal target(@dispatch_0::@vmvx::@dispatch_0) : index
%dispatch_1_ordinal = hal.executable.export.ordinal target(@dispatch_1::@vmvx::@dispatch_1) : index
%dispatch_2_ordinal = hal.executable.export.ordinal target(@dispatch_2::@vmvx::@dispatch_2) : index
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_0_exe : !hal.executable)[%dispatch_0_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_1_exe : !hal.executable)[%dispatch_1_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%dispatch_2_exe : !hal.executable)[%dispatch_2_ordinal] workgroups([%c1, %c1, %c1]) bindings([(%c0 : index)[%c0, %c0]]) flags(None)
util.return
}
@@ -158,8 +156,8 @@
// CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_1)
// CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_2)
// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
//
// CHECK: util.initializer
// CHECK-DAG: %[[DISPATCH_0_EXE:.+]] = hal.executable.lookup device(%{{.+}}) executable(@link_executables_linked_vmvx) : !hal.executable
@@ -169,17 +167,15 @@
// CHECK-DAG: %[[DISPATCH_1_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_1)
// CHECK-DAG: %[[DISPATCH_2_ORDINAL:.+]] = hal.executable.export.ordinal target(@link_executables_linked_vmvx::@vmvx_bytecode_fb::@dispatch_2)
// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_0_EXE]] : !hal.executable)[%[[DISPATCH_0_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
-// CHECK-NEXT: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_1_EXE]] : !hal.executable)[%[[DISPATCH_1_ORDINAL]]] workgroups([%c1, %c1, %c1])
+// CHECK: hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%[[DISPATCH_2_EXE]] : !hal.executable)[%[[DISPATCH_2_ORDINAL]]] workgroups([%c1, %c1, %c1])
// -----
#vmvx_target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @dispatch_0 {
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir
index 38cc29a..9f18092 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/pipeline.mlir
@@ -2,11 +2,9 @@
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -23,15 +21,15 @@
%0:2 = iree_codegen.query_tile_sizes tensor<16x16xi8, #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2]>> -> index, index
%1 = affine.apply #map3()[%0#0]
%2 = affine.apply #map3()[%0#1]
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%1, %2, %0#0, %0#1}
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%1, %2, %0#0, %0#1}
%4:2 = iree_codegen.query_tile_sizes tensor<16x16xi8, #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2]>> -> index, index
%5 = affine.apply #map3()[%4#0]
%6 = affine.apply #map3()[%4#1]
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c256) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%5, %6, %4#0, %4#1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c256) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%5, %6, %4#0, %4#1}
%8:2 = iree_codegen.query_tile_sizes tensor<16x16xi32, #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2]>> -> index, index
%9 = affine.apply #map3()[%8#0]
%10 = affine.apply #map3()[%8#1]
- %11 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c512) : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xi32>>{%9, %10, %8#0, %8#1}
+ %11 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c512) : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xi32>>{%9, %10, %8#0, %8#1}
%12 = flow.dispatch.tensor.load %3, offsets = [0, 0, 0, 0], sizes = [%1, %2, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%1, %2, %0#0, %0#1} -> tensor<?x?x?x?xi8>
%13 = flow.dispatch.tensor.load %7, offsets = [0, 0, 0, 0], sizes = [%5, %6, %4#0, %4#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi8>>{%5, %6, %4#0, %4#1} -> tensor<?x?x?x?xi8>
%14 = flow.dispatch.tensor.load %11, offsets = [0, 0, 0, 0], sizes = [%9, %10, %8#0, %8#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x?x?xi32>>{%9, %10, %8#0, %8#1} -> tensor<?x?x?x?xi32>
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
index 5a2f640..34bcdaa 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
@@ -1,18 +1,16 @@
// RUN: iree-opt -pass-pipeline='builtin.module(iree-vmvx-select-lowering-strategy)' -split-input-file %s | FileCheck %s
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @matmul_static() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
%cst = arith.constant 0.000000e+00 : f32
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readonly:tensor<384x512xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readonly:tensor<512x128xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) : !flow.dispatch.tensor<writeonly:tensor<384x128xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x128xf32>> -> tensor<512x128xf32>
%5 = tensor.empty() : tensor<384x128xf32>
@@ -32,11 +30,9 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 6, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @copy_op_dynamic() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
@@ -46,8 +42,8 @@
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<?x?xi32>{%0, %1}
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<?x?xi32>{%2, %3}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<?x?xi32>{%0, %1}
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<?x?xi32>{%2, %3}
%subview = memref.subview %7[%4, %5] [%0, %1] [1, 1] : memref<?x?xi32> to memref<?x?xi32, strided<[?, 1], offset: ?>>
linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%6 : memref<?x?xi32>) outs(%subview : memref<?x?xi32, strided<[?, 1], offset: ?>>) {
^bb0(%in: i32, %out: i32):
@@ -66,19 +62,17 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @static_1d_fft_stage2() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%cst = arith.constant dense<[1.000000e+00, 6.12323426E-17]> : tensor<2xf32>
%cst_0 = arith.constant dense<[-0.000000e+00, -1.000000e+00]> : tensor<2xf32>
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : !flow.dispatch.tensor<readwrite:tensor<32xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%3 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readwrite:tensor<32xf32>> -> tensor<32xf32>
%4:2 = iree_linalg_ext.fft ins(%c2, %cst, %cst_0 : index, tensor<2xf32>, tensor<2xf32>) outs(%2, %3 : tensor<32xf32>, tensor<32xf32>) : tensor<32xf32>, tensor<32xf32>
@@ -97,13 +91,11 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>,
- #hal.descriptor_set.binding<3, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -117,11 +109,11 @@
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = arith.index_castui %0 : i32 to index
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3360x32xi8>>
- %3 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32xi32>>
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c107520) : !flow.dispatch.tensor<readonly:tensor<32xi32>>
- %5 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x3360xi8>>{%1}
- %6 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x32xi8>>{%1}
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<3360x32xi8>>
+ %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<32xi32>>
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c107520) : !flow.dispatch.tensor<readonly:tensor<32xi32>>
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x3360xi8>>{%1}
+ %6 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x32xi8>>{%1}
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%1, 3360], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x3360xi8>>{%1} -> tensor<?x3360xi8>
%8 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [3360, 32], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<3360x32xi8>> -> tensor<3360x32xi8>
%9 = flow.dispatch.tensor.load %3, offsets = [0], sizes = [32], strides = [1] : !flow.dispatch.tensor<readonly:tensor<32xi32>> -> tensor<32xi32>
@@ -158,11 +150,9 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
%c131072 = arith.constant 131072 : index
@@ -175,8 +165,8 @@
%5 = arith.index_castui %1 : i32 to index
%6 = arith.index_castui %2 : i32 to index
%7 = arith.index_castui %3 : i32 to index
- %8 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
- %9 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
+ %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5}
+ %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
%10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
%11 = tensor.empty(%6, %7) : tensor<?x?xi32>
%unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
@@ -194,11 +184,9 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = true}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
#map = affine_map<()[s0] -> (1024 ceildiv s0)>
#map1 = affine_map<()[s0] -> (2048 ceildiv s0)>
@@ -206,11 +194,11 @@
func.func @elem_pack_ukernels() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x2048xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1024x2048xf32>>
%1:2 = iree_codegen.query_tile_sizes tensor<1024x2048xf32, #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<1024x2048xf32>>> -> index, index
%2 = affine.apply #map()[%1#0]
%3 = affine.apply #map1()[%1#1]
- %4 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%2, %3, %1#0, %1#1}
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x?x?xf32>>{%2, %3, %1#0, %1#1}
%5 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1024, 2048], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1024x2048xf32>> -> tensor<1024x2048xf32>
%6 = tensor.empty() : tensor<1024x2048xf32>
%7 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<1024x2048xf32>) outs(%6 : tensor<1024x2048xf32>) {
@@ -240,10 +228,8 @@
// -----
#executable_target_vmvx_bytecode_fb = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "none"}>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @copy_cst() attributes {hal.executable.target = #executable_target_vmvx_bytecode_fb} {
%cst = arith.constant dense<4.200000e-01> : tensor<5x19x8x4xf32>
@@ -255,7 +241,7 @@
%4 = arith.shli %3, %c32_i64 : i64
%5 = arith.ori %2, %4 : i64
%6 = arith.index_castui %5 : i64 to index
- %7 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%6) : !flow.dispatch.tensor<writeonly:tensor<5x19x8x4xf32>>
+ %7 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%6) : !flow.dispatch.tensor<writeonly:tensor<5x19x8x4xf32>>
flow.dispatch.tensor.store %cst, %7, offsets = [0, 0, 0, 0], sizes = [5, 19, 8, 4], strides = [1, 1, 1, 1] : tensor<5x19x8x4xf32> -> !flow.dispatch.tensor<writeonly:tensor<5x19x8x4xf32>>
return
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp
index 41acaf2..751f784 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/OutlineDispatchExterns.cpp
@@ -110,9 +110,6 @@
dispatchExternOp.getSubgroupSizeAttr(),
dispatchExternOp.getWorkgroupLocalMemoryAttr());
exportOp->setDialectAttrs(dispatchExternOp->getDialectAttrs());
- if (auto bindingsAttr = dispatchExternOp.getBindingsAttr()) {
- exportOp->setAttr("hal.interface.bindings", bindingsAttr);
- }
if (!dispatchExternOp.getWorkgroupCount().empty()) {
IRMapping mapper;
dispatchExternOp.getWorkgroupCount().cloneInto(
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
index 8ce0a36..3d25684 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/deduplicate_executables.mlir
@@ -386,11 +386,9 @@
hal.return %selected : i1
}
hal.executable.export public @dispatch ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) {
^bb0(%device: !hal.device, %workload: index):
hal.return %workload, %workload, %workload : index, index, index
@@ -405,11 +403,9 @@
hal.return %selected : i1
}
hal.executable.export public @dispatch ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) {
^bb0(%device: !hal.device, %workload: index):
hal.return %workload, %workload, %workload : index, index, index
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
index 4e73fe4..d4ff9aa 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/outline_dispatch_externs.mlir
@@ -4,8 +4,7 @@
// CHECK-NEXT: hal.executable.variant public @a target(<"llvm-cpu", "a">)
// CHECK-SAME: objects([#hal.executable.object<{path = "a.o"}>])
// CHECK-NEXT: hal.executable.export public @main ordinal(100)
-// CHECK-SAME: layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
-// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>]
+// CHECK-SAME: layout(#hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer>]>)
// CHECK-NEXT: ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
// CHECK-NEXT: %ok, %value = hal.device.query<%arg0 : !hal.device> key("some" :: "value") : i1, i32
// CHECK-NEXT: %0 = arith.index_cast %value : i32 to index
@@ -16,8 +15,7 @@
// CHECK-NEXT: %ok, %value = hal.device.query<%arg0 : !hal.device> key("some" :: "feature") : i1, i32
// CHECK-NEXT: hal.return %ok : i1
// CHECK: hal.executable.export public @main ordinal(200)
-// CHECK-SAME: layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>)
-// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>]
+// CHECK-SAME: layout(#hal.pipeline.layout<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer>]>)
// CHECK-NEXT: ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
// Demonstrates the full functionality of an extern dispatch op.
@@ -41,17 +39,10 @@
hal.return %x_capture, %y_capture, %z : index, index, index
}
// Must match the external definition.
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>)
- // Optional, automatically inferred if omitted.
- bindings([
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>
- ])
// Can have object references for multiple targets or configurations.
objects({
#hal.executable.target<"llvm-cpu", "a"> ordinal(100) = [#hal.executable.object<{path = "a.o"}>],
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp
index 39b5ec8..14790bf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.cpp
@@ -19,20 +19,15 @@
void PipelineLayout::print(llvm::raw_ostream &os) const {
os << "PipelineLayout:\n";
- os << " push constants: " << pushConstantCount << "\n";
- os << " sets:\n";
- for (auto &setLayout : setLayouts) {
- os << " set[" << setLayout.ordinal
- << "]: " << stringifyDescriptorSetLayoutFlags(setLayout.flags) << "\n";
- for (auto &binding : setLayout.bindings) {
- os << " binding[" << binding.ordinal
- << "]: " << stringifyDescriptorType(binding.type) << "\n";
- }
+ os << " constants: " << constantCount << "\n";
+ os << " bindings:\n";
+ for (auto &binding : bindings) {
+ os << " binding[" << binding.ordinal
+ << "]: " << stringifyDescriptorType(binding.type) << "\n";
}
os << " resource map:\n";
- for (auto setBinding : llvm::enumerate(resourceMap)) {
- os << " resource[" << setBinding.index() << "]: set "
- << setBinding.value().first << " binding " << setBinding.value().second
+ for (auto ordinal : llvm::enumerate(resourceMap)) {
+ os << " resource[" << ordinal.index() << "]: binding " << ordinal.value()
<< "\n";
}
}
@@ -41,33 +36,18 @@
static PipelineLayout
assumeExportLayout(IREE::HAL::PipelineLayoutAttr layoutAttr) {
PipelineLayout pipelineLayout;
- pipelineLayout.pushConstantCount = layoutAttr.getPushConstants();
+ pipelineLayout.constantCount = layoutAttr.getConstants();
- auto setLayoutAttrs = layoutAttr.getSetLayouts();
- int64_t bindingCount = 0;
- for (auto setLayoutAttr : setLayoutAttrs) {
- bindingCount += setLayoutAttr.getBindings().size();
- }
-
- pipelineLayout.setLayouts.resize(setLayoutAttrs.size());
+ size_t bindingCount = layoutAttr.getBindings().size();
+ pipelineLayout.bindings.resize(bindingCount);
pipelineLayout.resourceMap.resize(bindingCount);
- for (auto setLayoutAttr : setLayoutAttrs) {
- DescriptorSetLayout setLayout;
- setLayout.ordinal = setLayoutAttr.getOrdinal();
- setLayout.flags = setLayoutAttr.getFlags().value_or(
- IREE::HAL::DescriptorSetLayoutFlags::None);
- auto bindingAttrs = setLayoutAttr.getBindings();
- setLayout.bindings.resize(bindingAttrs.size());
- for (auto bindingAttr : bindingAttrs) {
- DescriptorSetLayoutBinding setBinding;
- setBinding.ordinal = bindingAttr.getOrdinal();
- setBinding.type = bindingAttr.getType();
- setBinding.flags = bindingAttr.getFlags();
- setLayout.bindings[setBinding.ordinal] = setBinding;
- pipelineLayout.resourceMap.emplace_back(setLayout.ordinal,
- setBinding.ordinal);
- }
- pipelineLayout.setLayouts[setLayout.ordinal] = setLayout;
+ for (auto [i, bindingAttr] : llvm::enumerate(layoutAttr.getBindings())) {
+ PipelineLayoutBinding binding;
+ binding.ordinal = i;
+ binding.type = bindingAttr.getType();
+ binding.flags = bindingAttr.getFlags();
+ pipelineLayout.bindings[binding.ordinal] = binding;
+ pipelineLayout.resourceMap[i] = binding.ordinal;
}
return pipelineLayout;
@@ -174,30 +154,25 @@
}
PipelineLayout pipelineLayout;
- pipelineLayout.pushConstantCount = operandCount;
+ pipelineLayout.constantCount = operandCount;
pipelineLayout.resourceMap.resize(bindingCount);
- // TODO(#18154): simplify binding setup.
- DescriptorSetLayout setLayout;
- setLayout.ordinal = 0;
- setLayout.flags = IREE::HAL::DescriptorSetLayoutFlags::None;
- setLayout.bindings.reserve(bindingCount);
+ IREE::HAL::PipelineLayoutFlags layoutFlags =
+ IREE::HAL::PipelineLayoutFlags::None;
for (unsigned i = 0; i < bindingCount; ++i) {
const auto &descriptorInfo = descriptorInfos[i];
if (allEnumBitsSet(descriptorInfo.flags,
IREE::HAL::DescriptorFlags::Indirect)) {
- setLayout.flags =
- setLayout.flags | IREE::HAL::DescriptorSetLayoutFlags::Indirect;
+ layoutFlags = layoutFlags | IREE::HAL::PipelineLayoutFlags::Indirect;
}
- DescriptorSetLayoutBinding setBinding;
- setBinding.ordinal = setLayout.bindings.size();
- setBinding.type = IREE::HAL::DescriptorType::StorageBuffer;
- setBinding.flags = descriptorInfo.flags;
- setLayout.bindings.push_back(setBinding);
- pipelineLayout.resourceMap[i] =
- std::make_pair(setLayout.ordinal, setBinding.ordinal);
+ PipelineLayoutBinding binding;
+ binding.ordinal = i;
+ binding.type = IREE::HAL::DescriptorType::StorageBuffer;
+ binding.flags = descriptorInfo.flags;
+ pipelineLayout.bindings.push_back(binding);
+ pipelineLayout.resourceMap[i] = binding.ordinal;
}
- pipelineLayout.setLayouts.push_back(setLayout);
+ pipelineLayout.flags = layoutFlags;
LLVM_DEBUG({
auto executableOp = exportOp->getParentOfType<IREE::Stream::ExecutableOp>();
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h
index 7d08959..50f17d9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/Analysis/BindingLayout.h
@@ -16,35 +16,28 @@
namespace mlir::iree_compiler::IREE::HAL {
-struct DescriptorSetLayoutBinding {
+struct PipelineLayoutBinding {
// Ordinal of the descriptor within its parent set layout.
- unsigned ordinal;
+ unsigned ordinal = 0;
// Storage type of the descriptor resource.
- IREE::HAL::DescriptorType type;
+ IREE::HAL::DescriptorType type = IREE::HAL::DescriptorType::StorageBuffer;
// Flags defining how the descriptor behaves.
- IREE::HAL::DescriptorFlags flags;
+ IREE::HAL::DescriptorFlags flags = IREE::HAL::DescriptorFlags::None;
};
-struct DescriptorSetLayout {
- // Ordinal of the set within the parent pipeline layout.
- unsigned ordinal;
- // Usage of the descriptor set (such as whether it is persistent or push).
- IREE::HAL::DescriptorSetLayoutFlags flags;
- // Bindings within the layout. Ordinals may be sparse.
- SmallVector<DescriptorSetLayoutBinding> bindings;
-};
-
-using PipelineResourceMap = SmallVector<std::pair<unsigned, unsigned>>;
+using PipelineResourceMap = SmallVector<unsigned>;
struct PipelineLayout {
// Total number of 32-bit push constants allocated. Not all dispatchable
// functions using this layout will use all constants.
- int64_t pushConstantCount;
- // Sets bound in the layout. Ordinals may be sparse.
- SmallVector<DescriptorSetLayout> setLayouts;
- // Mapping of flattened source resource bindings into the descriptor sets.
- // Matches 1:1 with the IREE::Stream::CmdDispatchOp::resources.
+ int64_t constantCount;
+ // Bindings within the layout. Ordinals may be sparse.
+ SmallVector<PipelineLayoutBinding> bindings;
+ // Mapping of flattened source resource bindings into the descriptor set
+ // bindings. Matches 1:1 with the IREE::Stream::CmdDispatchOp::resources.
PipelineResourceMap resourceMap;
+ // Flags defining behavior of the pipeline.
+ IREE::HAL::PipelineLayoutFlags flags = IREE::HAL::PipelineLayoutFlags::None;
void print(llvm::raw_ostream &os) const;
};
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
index 8533479..4cbccfb 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToHAL/test/pseudo_ops.mlir
@@ -1,9 +1,7 @@
// RUN: iree-opt --split-input-file --iree-hal-conversion %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @ex {
hal.executable.variant public @variant target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">) {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
index 7290cd1..87fab3f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/executable_ops.mlir
@@ -13,49 +13,6 @@
// CHECK-LABEL: @executableCreate
util.func public @executableCreate(
- // CHECK-SAME: %[[DEV:.+]]: !vm.ref<!hal.device>
- %device: !hal.device,
- // CHECK-SAME: %[[LAYOUT0:.+]]: !vm.ref<!hal.pipeline_layout>,
- %layout0: !hal.pipeline_layout,
- // CHECK-SAME: %[[LAYOUT1:.+]]: !vm.ref<!hal.pipeline_layout>
- %layout1: !hal.pipeline_layout
- ) -> (!hal.executable, !hal.executable) {
-
- // CHECK-DAG: %[[FORMAT1:.+]] = vm.rodata.inline "_utf8_format1_
- // CHECK-DAG: %[[BINARY1:.+]] = vm.rodata.inline "exe_binary1" {alignment = 16 : i64} : !vm.buffer = dense<[0, 1, 2, 3]> : vector<4xi8>
- // CHECK-DAG: %[[NULL1:.+]] = vm.const.ref.zero : !vm.buffer
- // CHECK: %[[EXE1:.+]] = vm.call.variadic @hal.executable.create(
- // CHECK-SAME: %[[DEV]], %[[FORMAT1]], %[[BINARY1]], %[[NULL1]], [%[[LAYOUT0]], %[[LAYOUT1]]]
- // CHECK-SAME: ) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.pipeline_layout> ...) -> !vm.ref<!hal.executable>
- %0 = hal.executable.create device(%device : !hal.device) target(@exe::@binary1) layouts([%layout0, %layout1]) : !hal.executable
-
- // CHECK-DAG: %[[FORMAT2:.+]] = vm.rodata.inline "_utf8_format2_
- // CHECK-DAG: %[[BINARY2:.+]] = vm.rodata.inline "exe_binary2" {alignment = 16 : i64} : !vm.buffer = dense<[4, 5, 6, 7]> : vector<4xi8>
- // CHECK-DAG: %[[NULL2:.+]] = vm.const.ref.zero : !vm.buffer
- // CHECK: %[[EXE2:.+]] = vm.call.variadic @hal.executable.create(
- // CHECK-SAME: %[[DEV]], %[[FORMAT2]], %[[BINARY2]], %[[NULL2]], [%[[LAYOUT1]], %[[LAYOUT0]]]
- // CHECK-SAME: ) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.pipeline_layout> ...) -> !vm.ref<!hal.executable>
- %1 = hal.executable.create device(%device : !hal.device) target(@exe::@binary2) layouts([%layout1, %layout0]) : !hal.executable
-
- // CHECK: vm.return %[[EXE1]], %[[EXE2]]
- util.return %0, %1 : !hal.executable, !hal.executable
-}
-
-// -----
-
-hal.executable @exe {
- hal.executable.binary @binary1 attributes {
- data = dense<[0, 1, 2, 3]> : vector<4xi8>,
- format = "format1"
- }
- hal.executable.binary @binary2 attributes {
- data = dense<[4, 5, 6, 7]> : vector<4xi8>,
- format = "format2"
- }
-}
-
-// CHECK-LABEL: @executableCreate
-util.func public @executableCreate(
// CHECK-SAME: %[[DEV:.+]]: !vm.ref<!hal.device>
%device: !hal.device
) -> (!hal.executable, !hal.executable) {
@@ -97,16 +54,14 @@
// CHECK-LABEL: @multipleExecutables
util.func public @multipleExecutables(
- %device: !hal.device,
- %layout0: !hal.pipeline_layout,
- %layout1: !hal.pipeline_layout
+ %device: !hal.device
) -> (!hal.executable, !hal.executable) {
// CHECK-DAG: %[[FORMAT1:.+]] = vm.rodata.inline "_utf8_format_
// CHECK-DAG: %[[BINARY1:.+]] = vm.rodata.inline "exe1_binary1" {alignment = 16 : i64} : !vm.buffer = dense<[0, 1, 2, 3]> : vector<4xi8>
- %0 = hal.executable.create device(%device : !hal.device) target(@exe1::@binary1) layouts([%layout0, %layout1]) : !hal.executable
+ %0 = hal.executable.create device(%device : !hal.device) target(@exe1::@binary1) : !hal.executable
// CHECK-DAG: %[[FORMAT2:.+]] = vm.rodata.inline "_utf8_format_
// CHECK-DAG: %[[BINARY2:.+]] = vm.rodata.inline "exe2_binary2" {alignment = 16 : i64} : !vm.buffer = dense<[4, 5, 6, 7]> : vector<4xi8>
- %1 = hal.executable.create device(%device : !hal.device) target(@exe2::@binary2) layouts([%layout1, %layout0]) : !hal.executable
+ %1 = hal.executable.create device(%device : !hal.device) target(@exe2::@binary2) : !hal.executable
util.return %0, %1 : !hal.executable, !hal.executable
}
@@ -123,8 +78,6 @@
util.func public @executableConstants(
// CHECK-SAME: %[[DEV:.+]]: !vm.ref<!hal.device>
%device: !hal.device,
- // CHECK-SAME: %[[LAYOUT:.+]]: !vm.ref<!hal.pipeline_layout>
- %layout: !hal.pipeline_layout,
// CHECK-SAME: %[[CONSTANT0:.+]]: i32, %[[CONSTANT1:.+]]: i32
%constant0: i32, %constant1: i32
) -> !hal.executable {
@@ -141,13 +94,12 @@
// CHECK-DAG: %[[INDEX2:.+]] = vm.const.i64 2
// CHECK-DAG: vm.buffer.store.i32 %[[CONSTANT1]], %[[CONSTANTS]][%[[INDEX2]]] : i32 -> !vm.buffer
- // CHECK: %[[EXE:.+]] = vm.call.variadic @hal.executable.create(
- // CHECK-SAME: %[[DEV]], %[[FORMAT]], %[[BINARY]], %[[CONSTANTS]], [%[[LAYOUT]]]
- // CHECK-SAME: ) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.pipeline_layout> ...) -> !vm.ref<!hal.executable>
+ // CHECK: %[[EXE:.+]] = vm.call @hal.executable.create(
+ // CHECK-SAME: %[[DEV]], %[[FORMAT]], %[[BINARY]], %[[CONSTANTS]]
+ // CHECK-SAME: ) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer) -> !vm.ref<!hal.executable>
%0 = hal.executable.create
device(%device : !hal.device)
target(@exe::@binary)
- layouts([%layout])
constants([%constant0, %c0, %constant1]) : !hal.executable
// CHECK: vm.return %[[EXE]]
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
index 25567af..8270b21 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/Patterns.cpp
@@ -762,15 +762,10 @@
Value ordinal = builder.create<IREE::HAL::ExecutableExportOrdinalOp>(
loc, builder.getIndexType(), entryPointAttr);
- // TODO(#18154): simplify bindings by removing descriptor sets.
auto layoutAttr = exportOp.getLayout();
- auto bindingAttrs = IREE::HAL::getInterfaceBindingAttrs(
- exportOp, dispatchOp.getResources().size());
SmallVector<IREE::HAL::BindingValue> bindings;
- for (auto [i, bindingAttr] : llvm::enumerate(bindingAttrs)) {
- auto descriptorFlags = layoutAttr.getSetLayout(bindingAttr.getSet())
- .getBinding(bindingAttr.getBinding())
- .getFlags();
+ for (auto [i, bindingAttr] : llvm::enumerate(layoutAttr.getBindings())) {
+ auto descriptorFlags = bindingAttr.getFlags();
IREE::HAL::BindingValue binding;
if (bitEnumContainsAll(descriptorFlags,
IREE::HAL::DescriptorFlags::Indirect)) {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
index c764fa8..ece8202 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Conversion/StreamToHAL/test/cmd_ops.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-hal-conversion %s | FileCheck %s
+// RUN: iree-opt --split-input-file --allow-unregistered-dialect --iree-hal-conversion --cse --iree-hal-indirect-command-buffers=true %s | FileCheck %s
// Today all memory control operations are ignored and we're just left with
// the normal sequential execution barriers.
@@ -32,7 +32,7 @@
%c128 = arith.constant 128 : index
%c255_i32 = arith.constant 255 : i32
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
- %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg2: !stream.resource<transient>{%arg1}) {
+ %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg2: !stream.resource<transient>{%arg1}) {
// CHECK-NEXT: hal.command_buffer.fill_buffer<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%arg0 : !hal.buffer)[%c0, %c128]
// CHECK-SAME: pattern(%c255_i32 : i32)
@@ -52,7 +52,7 @@
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
- %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg4: !stream.resource<transient>{%arg1}, %arg2 as %arg5: !stream.resource<staging>{%arg3}) {
+ %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg4: !stream.resource<transient>{%arg1}, %arg2 as %arg5: !stream.resource<staging>{%arg3}) {
// CHECK-NEXT: hal.command_buffer.copy_buffer<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: source(%arg0 : !hal.buffer)[%c0]
// CHECK-SAME: target(%arg2 : !hal.buffer)[%c0]
@@ -73,7 +73,7 @@
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
- %0 = stream.cmd.execute on(#hal.device.affinity<@device>) with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<transient>{%arg3}) {
+ %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<transient>{%arg3}) {
// Out-of-place all-reduce:
// CHECK-NEXT: hal.command_buffer.collective
@@ -142,7 +142,7 @@
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
- %0 = stream.cmd.execute on(#hal.device.affinity<@device>) await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
+ %0 = stream.cmd.execute once on(#hal.device.affinity<@device>) await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
stream.cmd.concurrent {
// CHECK-NEXT: hal.command_buffer.copy_buffer<%[[CMD]]
stream.cmd.copy %arg5[%c0], %arg6[%c0], %c128 : !stream.resource<transient>{%arg1} -> !stream.resource<staging>{%arg3}
@@ -176,11 +176,9 @@
#executable_target_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64">
#executable_target_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer, Indirect>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer, Indirect>
]>
hal.executable private @ex {
hal.executable.variant public @aarch64 target(#executable_target_aarch64) {
@@ -328,7 +326,7 @@
%c0 = arith.constant 0 : index
%c128 = arith.constant 128 : index
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
- %0 = stream.cmd.execute on(#hal.device.affinity<@device, [0, 1]>) await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
+ %0 = stream.cmd.execute once on(#hal.device.affinity<@device, [0, 1]>) await(%arg4) => with(%arg0 as %arg5: !stream.resource<transient>{%arg1}, %arg2 as %arg6: !stream.resource<staging>{%arg3}) {
stream.cmd.copy %arg5[%c0], %arg6[%c0], %c128 : !stream.resource<transient>{%arg1} -> !stream.resource<staging>{%arg3}
} => !stream.timepoint
// CHECK: hal.device.queue.execute
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp
index 4869771..95cf53e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.cpp
@@ -86,45 +86,13 @@
}
//===----------------------------------------------------------------------===//
-// hal.descriptor_set.layout<*>
+// #hal.pipeline.layout<*>
//===----------------------------------------------------------------------===//
-DescriptorSetBindingAttr
-DescriptorSetLayoutAttr::getBinding(int64_t ordinal) const {
- for (auto binding : getBindings()) {
- if (binding.getOrdinal() == ordinal) {
- return binding;
- }
- }
- return {};
-}
-
-//===----------------------------------------------------------------------===//
-// hal.pipeline.layout<*>
-//===----------------------------------------------------------------------===//
-
-DescriptorSetLayoutAttr
-PipelineLayoutAttr::getSetLayout(int64_t ordinal) const {
- for (auto setLayout : getSetLayouts()) {
- if (setLayout.getOrdinal() == ordinal) {
- return setLayout;
- }
- }
- return {};
-}
-
-int64_t PipelineLayoutAttr::getFlatBindingIndex(int64_t set,
- int64_t binding) const {
- int64_t flatIndex = 0;
- for (auto setLayoutAttr : getSetLayouts()) {
- if (setLayoutAttr.getOrdinal() == set) {
- flatIndex += binding;
- break;
- } else {
- flatIndex += setLayoutAttr.getBindings().size();
- }
- }
- return flatIndex;
+PipelineBindingAttr PipelineLayoutAttr::getBinding(int64_t ordinal) const {
+ assert(ordinal >= 0 && ordinal < getBindings().size() &&
+ "binding ordinal out of bounds");
+ return getBindings()[ordinal];
}
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td
index 8d463aa..976a4ca 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td
@@ -178,12 +178,12 @@
let cppNamespace = "::mlir::iree_compiler::IREE::HAL";
}
-def HAL_DescriptorSetLayoutFlags_None : I32BitEnumAttrCase<"None", 0x0000>;
-def HAL_DescriptorSetLayoutFlags_Indirect : I32BitEnumAttrCase<"Indirect", 0x0001>;
-def HAL_DescriptorSetLayoutFlagsAttr :
- I32BitEnumAttr<"DescriptorSetLayoutFlags", "valid DescriptorSetLayout flags", [
- HAL_DescriptorSetLayoutFlags_None,
- HAL_DescriptorSetLayoutFlags_Indirect,
+def HAL_PipelineLayoutFlags_None : I32BitEnumAttrCase<"None", 0x0000>;
+def HAL_PipelineLayoutFlags_Indirect : I32BitEnumAttrCase<"Indirect", 0x0001>;
+def HAL_PipelineLayoutFlagsAttr :
+ I32BitEnumAttr<"PipelineLayoutFlags", "valid PipelineLayout flags", [
+ HAL_PipelineLayoutFlags_None,
+ HAL_PipelineLayoutFlags_Indirect,
]> {
let cppNamespace = "::mlir::iree_compiler::IREE::HAL";
}
@@ -376,57 +376,22 @@
}
//===----------------------------------------------------------------------===//
-// hal.descriptor_set.binding<*>
+// hal.pipeline.binding<*>
//===----------------------------------------------------------------------===//
-def HAL_DescriptorSetBindingAttr :
- AttrDef<HAL_Dialect, "DescriptorSetBinding", []> {
- let mnemonic = "descriptor_set.binding";
- let summary = [{descriptor set binding specification}];
+def HAL_PipelineBindingAttr :
+ AttrDef<HAL_Dialect, "PipelineBinding", []> {
+ let mnemonic = "pipeline.binding";
+ let summary = [{pipeline binding specification}];
let description = [{
- Specifies a single binding within a descriptor set layout.
+ Specifies a single binding within a pipeline layout.
}];
let parameters = (ins
- AttrParameter<"int64_t", "">:$ordinal,
AttrParameter<"DescriptorType", "">:$type,
OptionalParameter<"DescriptorFlags", "DescriptorFlags::None">:$flags
);
let assemblyFormat = [{
- `<` $ordinal `,` $type (`,` $flags^)? `>`
- }];
-}
-
-def HAL_DescriptorSetLayoutBindingArrayAttr :
- TypedArrayAttrBase<HAL_DescriptorSetBindingAttr,
- "HAL descriptor set layout binding array attribute">;
-
-//===----------------------------------------------------------------------===//
-// hal.descriptor_set.layout<*>
-//===----------------------------------------------------------------------===//
-
-def HAL_DescriptorSetLayoutAttr :
- AttrDef<HAL_Dialect, "DescriptorSetLayout", []> {
- let mnemonic = "descriptor_set.layout";
- let summary = [{descriptor set layout specification}];
- let description = [{
- Specifies the layout information of a single set of descriptors used within
- an pipeline layout. Multiple of these sets may be used by a single entry
- point to allow for bindings with similar update frequencies to be grouped.
- }];
- let parameters = (ins
- AttrParameter<"int64_t", "">:$ordinal,
- ArrayRefParameter<"DescriptorSetBindingAttr", "">:$bindings,
- OptionalParameter<"std::optional<DescriptorSetLayoutFlags>">:$flags
- );
- let assemblyFormat = [{
- `<`
- $ordinal `,`
- `bindings` `=` `[` $bindings `]`
- (`,` `flags` `=` $flags^)?
- `>`
- }];
- let extraClassDeclaration = [{
- DescriptorSetBindingAttr getBinding(int64_t ordinal) const;
+ `<` $type (`,` $flags^)? `>`
}];
}
@@ -444,56 +409,26 @@
lower-level target-specific argument passing behavior.
}];
let parameters = (ins
- AttrParameter<"int64_t", "">:$pushConstants,
- ArrayRefParameter<"DescriptorSetLayoutAttr", "">:$setLayouts
+ ArrayRefParameter<"PipelineBindingAttr", "">:$bindings,
+ OptionalParameter<"int64_t", "0">:$constants,
+ OptionalParameter<"std::optional<PipelineLayoutFlags>">:$flags
);
let assemblyFormat = [{
`<`
- `push_constants` `=` $pushConstants `,`
- `sets` `=` `[` $setLayouts `]`
+ (`constants` `=` $constants^ `,` ` `)?
+ `bindings` `=` `[` qualified($bindings) `]`
+ (`,` `flags` `=` $flags^)?
`>`
}];
let extraClassDeclaration = [{
- DescriptorSetLayoutAttr getSetLayout(int64_t ordinal) const;
-
- // Returns the binding index in a flattened list of all sets and bindings.
- // For example, if the layout is [set(bindings[4]), set(bindings[2])] then
- // a query for set 1 binding 0 would return 4.
- int64_t getFlatBindingIndex(int64_t set, int64_t binding) const;
+ IREE::HAL::PipelineBindingAttr getBinding(int64_t ordinal) const;
+ IREE::HAL::PipelineBindingAttr getBinding(APInt ordinal) const {
+ return getBinding(ordinal.getSExtValue());
+ }
}];
}
//===----------------------------------------------------------------------===//
-// hal.interface.binding<*>
-//===----------------------------------------------------------------------===//
-
-def HAL_InterfaceBindingAttr :
- AttrDef<HAL_Dialect, "InterfaceBinding", []> {
- let mnemonic = "interface.binding";
- let summary = [{interface binding specification}];
- let description = [{
- Specifies the descriptor set and binding ordinal of a particular layout
- binding.
-
- Example:
- ```mlir
- #hal.interface.binding<0, 1>
- ```
- }];
- let parameters = (ins
- AttrParameter<"int64_t", "">:$set,
- AttrParameter<"int64_t", "">:$binding
- );
- let assemblyFormat = [{
- `<` $set `,` $binding `>`
- }];
-}
-
-def HAL_InterfaceBindingArrayAttr :
- TypedArrayAttrBase<HAL_InterfaceBindingAttr,
- "HAL binding array attribute">;
-
-//===----------------------------------------------------------------------===//
// #hal.executable.target<*>
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
index 811789c..00c2c6e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
@@ -100,9 +100,7 @@
MLIRContext *context = attr.getContext();
// TODO(benvanik): remove this interface or make it an attr interface.
if (auto bindingAttr =
- llvm::dyn_cast<IREE::HAL::DescriptorSetBindingAttr>(attr)) {
- fn(IntegerAttr::get(IndexType::get(context),
- APInt(64, bindingAttr.getOrdinal())));
+ llvm::dyn_cast<IREE::HAL::PipelineBindingAttr>(attr)) {
fn(IREE::HAL::DescriptorTypeAttr::get(context, bindingAttr.getType()));
fn(IREE::HAL::DescriptorFlagsAttr::get(context, bindingAttr.getFlags()));
return success();
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp
index 36eaac4..45dbb15 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOpFolders.cpp
@@ -188,7 +188,7 @@
/// the same scope.
struct SkipCommandBufferDeviceOp
: public OpRewritePattern<CommandBufferDeviceOp> {
- using OpRewritePattern<CommandBufferDeviceOp>::OpRewritePattern;
+ using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(CommandBufferDeviceOp op,
PatternRewriter &rewriter) const override {
@@ -340,6 +340,96 @@
results.insert<FoldCommandBufferCopyBufferSubspans>(context);
}
+namespace {
+
+/// Folds hal.buffer.subspans into dispatch bindings.
+/// The binding range is always equal to or a subset of the subspan.
+template <typename OpT>
+struct FoldCommandBufferDispatchBufferSubspan : public OpRewritePattern<OpT> {
+ using OpRewritePattern<OpT>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(OpT op,
+ PatternRewriter &rewriter) const override {
+ auto ip = rewriter.saveInsertionPoint();
+ rewriter.setInsertionPoint(op);
+ bool needsUpdate = false;
+ auto bindingBuffers = llvm::to_vector(op.getBindingBuffers());
+ auto bindingOffsets = llvm::to_vector(op.getBindingOffsets());
+ for (size_t i = 0; i < bindingBuffers.size(); ++i) {
+ auto *definingOp = bindingBuffers[i].getDefiningOp();
+ if (!definingOp)
+ continue;
+ if (auto subspanOp = dyn_cast<IREE::HAL::BufferSubspanOp>(definingOp)) {
+ needsUpdate = true;
+ bindingBuffers[i] = subspanOp.getSourceBuffer();
+ bindingOffsets[i] = rewriter.createOrFold<arith::AddIOp>(
+ subspanOp.getLoc(), subspanOp.getSourceOffset(), bindingOffsets[i]);
+ }
+ }
+ rewriter.restoreInsertionPoint(ip);
+ if (!needsUpdate)
+ return failure();
+ rewriter.modifyOpInPlace(op, [&]() {
+ auto mutableBindingBuffers = op.getBindingBuffersMutable();
+ mutableBindingBuffers.clear();
+ mutableBindingBuffers.append(bindingBuffers);
+ auto mutableBindingOffsets = op.getBindingOffsetsMutable();
+ mutableBindingOffsets.clear();
+ mutableBindingOffsets.append(bindingOffsets);
+ });
+ return success();
+ }
+};
+
+} // namespace
+
+void CommandBufferDispatchOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results
+ .insert<FoldCommandBufferDispatchBufferSubspan<CommandBufferDispatchOp>>(
+ context);
+}
+
+namespace {
+
+/// Folds hal.buffer.subspans into the indirect dispatch workgroup count.
+/// The binding range is always equal to or a subset of the subspan.
+struct FoldCommandBufferDispatchIndirectBufferSubspan
+ : public OpRewritePattern<CommandBufferDispatchIndirectOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(CommandBufferDispatchIndirectOp op,
+ PatternRewriter &rewriter) const override {
+ Value workgroupsBuffer = op.getWorkgroupsBuffer();
+ auto *definingOp = workgroupsBuffer.getDefiningOp();
+ if (!definingOp)
+ return failure();
+ Value workgroupsOffset = op.getWorkgroupsOffset();
+ if (auto subspanOp = dyn_cast<IREE::HAL::BufferSubspanOp>(definingOp)) {
+ workgroupsBuffer = subspanOp.getSourceBuffer();
+ workgroupsOffset = rewriter.createOrFold<arith::AddIOp>(
+ subspanOp.getLoc(), subspanOp.getSourceOffset(), workgroupsOffset);
+ } else {
+ return failure();
+ }
+ rewriter.modifyOpInPlace(op, [&]() {
+ op.getWorkgroupsBufferMutable().set(workgroupsBuffer);
+ op.getWorkgroupsOffsetMutable().set(workgroupsOffset);
+ });
+ return success();
+ }
+};
+
+} // namespace
+
+void CommandBufferDispatchIndirectOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results.insert<FoldCommandBufferDispatchIndirectBufferSubspan>(context);
+ results.insert<
+ FoldCommandBufferDispatchBufferSubspan<CommandBufferDispatchIndirectOp>>(
+ context);
+}
+
//===----------------------------------------------------------------------===//
// hal.device.queue.execute
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
index 09581e8..4485011 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
@@ -47,14 +47,14 @@
}
//===----------------------------------------------------------------------===//
-// custom<DescriptorSetBindings>($binding_ordinals,
+// custom<PipelineBindings>($binding_ordinals,
// $binding_buffers,
// type($binding_buffers),
// $binding_offsets,
// $binding_lengths)
//===----------------------------------------------------------------------===//
-static ParseResult parseDescriptorSetBindings(
+static ParseResult parsePipelineBindings(
OpAsmParser &parser,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &ordinals,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &buffers,
@@ -86,11 +86,11 @@
return success();
}
-static void printDescriptorSetBindings(OpAsmPrinter &p, Operation *op,
- ValueRange ordinals, ValueRange buffers,
- TypeRange bufferTypes,
- ValueRange bufferOffsets,
- ValueRange bufferLengths) {
+static void printPipelineBindings(OpAsmPrinter &p, Operation *op,
+ ValueRange ordinals, ValueRange buffers,
+ TypeRange bufferTypes,
+ ValueRange bufferOffsets,
+ ValueRange bufferLengths) {
llvm::interleaveComma(llvm::zip_equal(ordinals, buffers, bufferTypes,
bufferOffsets, bufferLengths),
p,
@@ -1887,7 +1887,7 @@
LogicalResult InterfaceConstantLoadOp::verify() {
InterfaceConstantLoadOp op = *this;
auto layoutAttr = op.getLayout();
- if (op.getOrdinal().getZExtValue() >= layoutAttr.getPushConstants()) {
+ if (op.getOrdinal().getZExtValue() >= layoutAttr.getConstants()) {
return op.emitOpError("push constant ordinal out of bounds");
}
return success();
@@ -1897,18 +1897,20 @@
// hal.interface.binding.subspan
//===----------------------------------------------------------------------===//
-void InterfaceBindingSubspanOp::build(
- OpBuilder &builder, OperationState &result, Type resultType,
- IREE::HAL::PipelineLayoutAttr layout, APInt set, APInt binding,
- Value byte_offset, ValueRange dynamic_dims, IntegerAttr alignment,
- std::optional<DescriptorFlags> flags) {
+void InterfaceBindingSubspanOp::build(OpBuilder &builder,
+ OperationState &result, Type resultType,
+ IREE::HAL::PipelineLayoutAttr layout,
+ APInt binding, Value byte_offset,
+ ValueRange dynamic_dims,
+ IntegerAttr alignment,
+ std::optional<DescriptorFlags> flags) {
IREE::HAL::DescriptorFlagsAttr descriptorAttr;
if (flags.has_value()) {
descriptorAttr = IREE::HAL::DescriptorFlagsAttr::get(builder.getContext(),
flags.value());
}
- build(builder, result, resultType, layout, set, binding, byte_offset,
- dynamic_dims, alignment, descriptorAttr);
+ build(builder, result, resultType, layout, binding, byte_offset, dynamic_dims,
+ alignment, descriptorAttr);
}
LogicalResult InterfaceBindingSubspanOp::verify() {
@@ -1921,58 +1923,24 @@
<< " associated dimension SSA values";
}
}
- int64_t set = op.getSet().getSExtValue();
- int64_t binding = op.getBinding().getSExtValue();
- bool foundSet = false;
- bool foundBinding = false;
- for (auto setLayoutAttr : op.getLayout().getSetLayouts()) {
- if (setLayoutAttr.getOrdinal() == set) {
- foundSet = true;
- for (auto bindingAttr : setLayoutAttr.getBindings()) {
- if (bindingAttr.getOrdinal() == binding) {
- foundBinding = true;
- break;
- }
- }
- }
- }
- if (!foundSet) {
- return op.emitOpError("set ordinal ")
- << set << " not present in pipeline layout";
- } else if (!foundBinding) {
+ uint64_t binding = op.getBinding().getZExtValue();
+ if (binding >= op.getLayout().getBindings().size()) {
return op.emitOpError("binding ordinal ")
- << binding << " not present in descriptor set layout";
+ << binding << " out of bounds in layout " << op.getLayout();
}
return success();
}
-IREE::HAL::DescriptorSetBindingAttr
-InterfaceBindingSubspanOp::getDescriptorSetBindingAttr() {
- int64_t set = getSet().getSExtValue();
- int64_t binding = getBinding().getSExtValue();
- for (auto setLayoutAttr : getLayout().getSetLayouts()) {
- if (setLayoutAttr.getOrdinal() == set) {
- for (auto bindingAttr : setLayoutAttr.getBindings()) {
- if (bindingAttr.getOrdinal() == binding) {
- return bindingAttr;
- }
- }
- }
- }
- return {};
+IREE::HAL::PipelineBindingAttr
+InterfaceBindingSubspanOp::getPipelineBindingAttr() {
+ return getLayout().getBinding(getBinding());
}
IREE::HAL::DescriptorType InterfaceBindingSubspanOp::getDescriptorType() {
- auto bindingAttr = getDescriptorSetBindingAttr();
+ auto bindingAttr = getPipelineBindingAttr();
return bindingAttr.getType();
}
-int64_t InterfaceBindingSubspanOp::getFlatBindingIndex() {
- int64_t set = getSet().getSExtValue();
- int64_t binding = getBinding().getSExtValue();
- return getLayout().getFlatBindingIndex(set, binding);
-}
-
llvm::MaybeAlign InterfaceBindingSubspanOp::getBaseAlignment() {
if (auto baseAlignmentInt = getAlignment()) {
return llvm::MaybeAlign(baseAlignmentInt.value().getZExtValue());
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
index d449467..fdd43b7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
@@ -429,7 +429,6 @@
OptionalAttr<HAL_WorkgroupSizeAttr>:$workgroup_size,
OptionalAttr<HAL_SubgroupSizeAttr>:$subgroup_size,
OptionalAttr<IndexAttr>:$workgroup_local_memory,
- OptionalAttr<HAL_InterfaceBindingArrayAttr>:$bindings,
OptionalAttr<Util_TiedOpStorageAttr>:$tied_operands
);
let results = (outs
@@ -451,7 +450,6 @@
$tied_operands)
`count` `` custom<WorkgroupCountRegion>($workgroup_count)
`layout` `(` $layout `)`
- (`bindings` `(` $bindings^ `)`)?
`objects` `(` `{` custom<TargetConditionObjects>($targets,
$target_ordinals,
$target_objects,
@@ -1532,6 +1530,7 @@
)>,
];
+ let hasCanonicalizer = 1;
let hasVerifier = 1;
}
@@ -1599,6 +1598,7 @@
)>,
];
+ let hasCanonicalizer = 1;
let hasVerifier = 1;
}
@@ -3059,7 +3059,6 @@
let arguments = (ins
HAL_PipelineLayoutAttr:$layout,
- IndexAttr:$set,
IndexAttr:$binding,
Optional<HAL_DeviceSize>:$byte_offset,
HAL_ShapeDynamicDims:$dynamic_dims,
@@ -3072,7 +3071,6 @@
let assemblyFormat = [{
`layout` `(` $layout `)`
- `set` `(` $set `)`
`binding` `(` $binding `)`
(`alignment` `(` $alignment^ `)`)?
(`offset` `(` $byte_offset^ `)`)?
@@ -3084,7 +3082,6 @@
OpBuilder<(ins
"Type":$resultType,
"IREE::HAL::PipelineLayoutAttr":$layout,
- "APInt":$set,
"APInt":$binding,
"Value":$byte_offset,
"ValueRange":$dynamic_dims,
@@ -3100,16 +3097,11 @@
ValueRange getResultDynamicDims(unsigned idx) { return getDynamicDims(); }
// Returns the descriptor set binding metadata for the given set/binding.
- IREE::HAL::DescriptorSetBindingAttr getDescriptorSetBindingAttr();
+ IREE::HAL::PipelineBindingAttr getPipelineBindingAttr();
// Returns the type of the descriptor this binding references.
IREE::HAL::DescriptorType getDescriptorType();
- // Returns the binding index in a flattened list of all sets and bindings.
- // For example, if the layout is [set(bindings[4]), set(bindings[2])] then
- // a query for set 1 binding 0 would return 4.
- int64_t getFlatBindingIndex();
-
// Returns the alignment of the base buffer pointer (before offset).
llvm::MaybeAlign getBaseAlignment();
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp
index 1903bc8..9e31af7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.cpp
@@ -90,26 +90,6 @@
// Utilities
//===----------------------------------------------------------------------===//
-SmallVector<IREE::HAL::InterfaceBindingAttr>
-getInterfaceBindingAttrs(Operation *op, size_t resourceCount) {
- // It'd be nice if we had something typed here but this is just used for
- // spooky action at a distance or user overrides. If the attribute is not
- // found (not set by MaterializeInterfaces or the user) we construct one by
- // convention (dense set 0 bindings for each resource).
- auto bindingAttrs = op->getAttrOfType<ArrayAttr>("hal.interface.bindings");
- if (bindingAttrs) {
- return llvm::to_vector(
- bindingAttrs.getAsRange<IREE::HAL::InterfaceBindingAttr>());
- }
- SmallVector<IREE::HAL::InterfaceBindingAttr> bindings;
- for (size_t i = 0; i < resourceCount; ++i) {
- bindings.push_back(IREE::HAL::InterfaceBindingAttr::get(op->getContext(),
- /*set=*/0,
- /*binding=*/i));
- }
- return bindings;
-}
-
//===----------------------------------------------------------------------===//
// Dialect registration
//===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h
index 543c26d..fefe68f 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALTypes.h
@@ -164,7 +164,7 @@
// A tuple containing runtime values for a descriptor set binding.
// The buffer specified may be either a !hal.buffer or an index of a binding
// table slot to source the buffer from.
-struct DescriptorSetBindingValue {
+struct PipelineBindingValue {
Value ordinal;
Value buffer;
Value byteOffset;
@@ -220,14 +220,14 @@
template <>
struct FieldParser<
- std::optional<mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlags>> {
- static FailureOr<mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlags>
+ std::optional<mlir::iree_compiler::IREE::HAL::PipelineLayoutFlags>> {
+ static FailureOr<mlir::iree_compiler::IREE::HAL::PipelineLayoutFlags>
parse(AsmParser &parser) {
std::string value;
if (parser.parseKeywordOrString(&value))
return failure();
auto result = mlir::iree_compiler::IREE::HAL::symbolizeEnum<
- mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlags>(value);
+ mlir::iree_compiler::IREE::HAL::PipelineLayoutFlags>(value);
if (!result.has_value())
return failure();
return result.value();
@@ -235,8 +235,7 @@
};
static inline AsmPrinter &operator<<(
AsmPrinter &printer,
- std::optional<mlir::iree_compiler::IREE::HAL::DescriptorSetLayoutFlags>
- param) {
+ std::optional<mlir::iree_compiler::IREE::HAL::PipelineLayoutFlags> param) {
printer << (param.has_value()
? mlir::iree_compiler::IREE::HAL::stringifyEnum(param.value())
: StringRef{""});
@@ -287,11 +286,6 @@
namespace mlir::iree_compiler::IREE::HAL {
-// Returns the assigned bindings via the `hal.interface.bindings` attribute
-// on the operation or default bindings in set 0 with bindings [0, count).
-SmallVector<IREE::HAL::InterfaceBindingAttr>
-getInterfaceBindingAttrs(Operation *op, size_t resourceCount);
-
} // namespace mlir::iree_compiler::IREE::HAL
#endif // IREE_COMPILER_DIALECT_HAL_IR_HALTYPES_H_
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel
index 8c88e20..a111296 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/BUILD.bazel
@@ -24,7 +24,6 @@
"channel_ops.mlir",
"command_buffer_folding.mlir",
"command_buffer_ops.mlir",
- "descriptor_set_ops.mlir",
"device_folding.mlir",
"device_ops.mlir",
"devices_ops.mlir",
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt
index f1fb349..e2d654b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/CMakeLists.txt
@@ -22,7 +22,6 @@
"channel_ops.mlir"
"command_buffer_folding.mlir"
"command_buffer_ops.mlir"
- "descriptor_set_ops.mlir"
"device_folding.mlir"
"device_ops.mlir"
"devices_ops.mlir"
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir
index d04c7d6..0e7dd9e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/attributes.mlir
@@ -3,30 +3,26 @@
// CHECK-LABEL: descriptor_set_layout_binding.basic
"descriptor_set_layout_binding.basic"() {
- // CHECK: dslb0 = #hal.descriptor_set.binding<0, uniform_buffer>
- dslb0 = #hal.descriptor_set.binding<0, uniform_buffer>,
- // CHECK: dslb1 = #hal.descriptor_set.binding<1, storage_buffer, "ReadOnly|Indirect">
- dslb1 = #hal.descriptor_set.binding<1, storage_buffer, "ReadOnly|Indirect">
+ // CHECK: dslb0 = #hal.pipeline.binding<uniform_buffer>
+ dslb0 = #hal.pipeline.binding<uniform_buffer>,
+ // CHECK: dslb1 = #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">
+ dslb1 = #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">
} : () -> ()
// -----
// CHECK-LABEL: pipeline_layout.basic
"pipeline_layout.basic"() {
- // CHECK: layout0 = #hal.pipeline.layout<push_constants = 4, sets = [
- // CHECK-SAME: <0, bindings = [
- // CHECK-SAME: <0, storage_buffer>
- // CHECK-SAME: <1, storage_buffer>
- // CHECK-SAME: <1, bindings = [
- // CHECK-SAME: <0, uniform_buffer>
- layout0 = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<0, uniform_buffer>
- ]>
+ // CHECK: layout0 = #hal.pipeline.layout<
+ // CHECK-SAME: constants = 4
+ // CHECK-SAME: bindings = [
+ // CHECK-SAME: #hal.pipeline.binding<storage_buffer>,
+ // CHECK-SAME: #hal.pipeline.binding<storage_buffer>,
+ // CHECK-SAME: #hal.pipeline.binding<uniform_buffer>
+ layout0 = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<uniform_buffer>
]>
} : () -> ()
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
index 3adbce8..279511e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_folding.mlir
@@ -98,13 +98,13 @@
// -----
-// CHECK-LABEL: @fold_buffer_subspan_into_push_descriptor_set
+// CHECK-LABEL: @fold_buffer_subspan_into_dispatch
// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
-// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout,
+// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable,
// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer
-util.func public @fold_buffer_subspan_into_push_descriptor_set(
+util.func public @fold_buffer_subspan_into_dispatch(
%cmd: !hal.command_buffer,
- %layout: !hal.pipeline_layout,
+ %executable: !hal.executable,
%buffer: !hal.buffer
) {
%c0 = arith.constant 0 : index
@@ -116,20 +116,52 @@
%c262140 = arith.constant 262140 : index
%c262144 = arith.constant 262144 : index
%subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%c4096, %c262144] : !hal.buffer
- // CHECK: hal.command_buffer.push_descriptor_set
+ // CHECK: hal.command_buffer.dispatch
// CHECK-SAME: bindings([
- hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer>
- layout(%layout : !hal.pipeline_layout)[%c0]
+ hal.command_buffer.dispatch<%cmd : !hal.command_buffer>
+ target(%executable: !hal.executable)[%c0]
+ workgroups([%c1, %c1, %c1])
bindings([
// 0 + 4096:
- // CHECK-NEXT: %c0 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c8000]
- %c0 = (%subspan : !hal.buffer)[%c0, %c8000],
+ // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c8000]
+ (%subspan : !hal.buffer)[%c0, %c8000],
// 4096 + 4:
- // CHECK-NEXT: %c1 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4100, %c262140]
- %c1 = (%subspan : !hal.buffer)[%c4, %c262140],
+ // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4100, %c262140]
+ (%subspan : !hal.buffer)[%c4, %c262140],
// No change:
- // CHECK-NEXT: %c2 = (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c262144]
- %c2 = (%buffer : !hal.buffer)[%c4096, %c262144]
+ // CHECK-NEXT: (%[[BASE_BUFFER]] : !hal.buffer)[%c4096, %c262144]
+ (%buffer : !hal.buffer)[%c4096, %c262144]
])
+ flags("None")
+ util.return
+}
+
+// -----
+
+// CHECK-LABEL: @fold_buffer_subspan_into_dispatch_indirect
+// CHECK-SAME: %[[CMD:.+]]: !hal.command_buffer,
+// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable,
+// CHECK-SAME: %[[BASE_BUFFER:.+]]: !hal.buffer
+util.func public @fold_buffer_subspan_into_dispatch_indirect(
+ %cmd: !hal.command_buffer,
+ %executable: !hal.executable,
+ %buffer: !hal.buffer
+ ) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c4 = arith.constant 4 : index
+ %c4096 = arith.constant 4096 : index
+ %c262144 = arith.constant 262144 : index
+ %subspan = hal.buffer.subspan<%buffer : !hal.buffer>[%c4096, %c262144] : !hal.buffer
+ // CHECK: hal.command_buffer.dispatch.indirect
+ hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer>
+ target(%executable: !hal.executable)[%c0]
+ // 4096 + 4:
+ // CHECK-SAME: workgroups(%[[BASE_BUFFER]] : !hal.buffer)[%c4100]
+ workgroups(%subspan : !hal.buffer)[%c4]
+ bindings([
+ (%buffer : !hal.buffer)[%c0, %c1]
+ ])
+ flags("None")
util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
index 77d56e3..2e14141 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/command_buffer_ops.mlir
@@ -213,44 +213,11 @@
// -----
-// CHECK-LABEL: @command_buffer_push_descriptor_set
-// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
-// CHECK-SAME: %[[LAYOUT:.+]]: !hal.pipeline_layout,
-// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
-// CHECK-SAME: %[[SLOT:.+]]: index)
-util.func public @command_buffer_push_descriptor_set(
- %cmd: !hal.command_buffer,
- %layout: !hal.pipeline_layout,
- %buffer: !hal.buffer,
- %slot: index) {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c4 = arith.constant 4 : index
- %c4096 = arith.constant 4096 : index
- %c8000 = arith.constant 8000 : index
- // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer>
- hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer>
- // CHECK-SAME: layout(%[[LAYOUT]] : !hal.pipeline_layout)[%c1]
- layout(%layout : !hal.pipeline_layout)[%c1]
- // CHECK-SAME: bindings([
- bindings([
- // CHECK-NEXT: %c0 = (%[[BUFFER]] : !hal.buffer)[%c4096, %c8000]
- %c0 = (%buffer : !hal.buffer)[%c4096, %c8000],
- // CHECK-NEXT: %c1 = (%[[SLOT]] : index)[%c4, %c4096]
- %c1 = (%slot : index)[%c4, %c4096]
- ])
- util.return
-}
-
-// -----
-
hal.executable @ex {
hal.executable.variant @backend target(<"backend", "format">) {
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
@@ -258,18 +225,34 @@
// CHECK-LABEL: @command_buffer_dispatch
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index,
-// CHECK-SAME: %[[X:[a-z0-9]+]]: index, %[[Y:[a-z0-9]+]]: index, %[[Z:[a-z0-9]+]]: index)
+// CHECK-SAME: %[[X:[a-z0-9]+]]: index, %[[Y:[a-z0-9]+]]: index, %[[Z:[a-z0-9]+]]: index,
+// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
+// CHECK-SAME: %[[SLOT:.+]]: index)
util.func public @command_buffer_dispatch(
%cmd: !hal.command_buffer,
%executable: !hal.executable, %ordinal: index,
- %x: index, %y: index, %z: index) {
- // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer>
- // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]]
- // CHECK-SAME: workgroups([%[[X]], %[[Y]], %[[Z]]])
- // CHECK-SAME: flags("None")
+ %x: index, %y: index, %z: index,
+ %buffer: !hal.buffer,
+ %slot: index) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c4 = arith.constant 4 : index
+ %c4096 = arith.constant 4096 : index
+ %c8000 = arith.constant 8000 : index
+ // CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer>
hal.command_buffer.dispatch<%cmd : !hal.command_buffer>
+ // CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]]
target(%executable: !hal.executable)[%ordinal]
+ // CHECK-SAME: workgroups([%[[X]], %[[Y]], %[[Z]]])
workgroups([%x, %y, %z])
+ // CHECK-SAME: bindings([
+ bindings([
+ // CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c4096, %c8000]
+ (%buffer : !hal.buffer)[%c4096, %c8000],
+ // CHECK-NEXT: (%[[SLOT]] : index)[%c4, %c4096]
+ (%slot : index)[%c4, %c4096]
+ ])
+ // CHECK-NEXT: flags("None")
flags("None")
util.return
}
@@ -278,30 +261,33 @@
hal.executable @ex {
hal.executable.variant @backend target(<"backend", "format">) {
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
// CHECK-LABEL: @command_buffer_dispatch_indirect
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
-// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:.+]]: index,
-// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, %[[OFFSET:.+]]: index)
+// CHECK-SAME: %[[EXECUTABLE:.+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index,
+// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer, %[[OFFSET:.+]]: index, %[[LENGTH:.+]]: index)
util.func public @command_buffer_dispatch_indirect(
%cmd: !hal.command_buffer,
%executable: !hal.executable, %ordinal: index,
- %buffer: !hal.buffer, %offset: index) {
+ %buffer: !hal.buffer, %offset: index, %length: index) {
// CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]]
// CHECK-SAME: workgroups(%[[BUFFER]] : !hal.buffer)[%[[OFFSET]]]
- // CHECK-SAME: flags("None")
+ // CHECK-SAME: bindings([
+ // CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%[[OFFSET]], %[[LENGTH]]]
+ // CHECK-NEXT: ]) flags("None")
hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer>
target(%executable: !hal.executable)[%ordinal]
workgroups(%buffer : !hal.buffer)[%offset]
+ bindings([
+ (%buffer : !hal.buffer)[%offset, %length]
+ ])
flags("None")
util.return
}
@@ -310,11 +296,9 @@
hal.executable @ex {
hal.executable.variant @backend target(<"backend", "format">) {
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
@@ -322,18 +306,23 @@
// CHECK-LABEL: @command_buffer_dispatch_indirect_indirect
// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer,
// CHECK-SAME: %[[EXECUTABLE:[a-z0-9]+]]: !hal.executable, %[[ORDINAL:[a-z0-9]+]]: index,
-// CHECK-SAME: %[[BUFFER_SLOT:[a-z0-9]+]]: index, %[[OFFSET:[a-z0-9]+]]: index)
+// CHECK-SAME: %[[BUFFER_SLOT:[a-z0-9]+]]: index, %[[OFFSET:[a-z0-9]+]]: index, %[[LENGTH:[a-z0-9]+]]: index)
util.func public @command_buffer_dispatch_indirect_indirect(
%cmd: !hal.command_buffer,
%executable: !hal.executable, %ordinal: index,
- %buffer_slot: index, %offset: index) {
+ %buffer_slot: index, %offset: index, %length: index) {
// CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%[[EXECUTABLE]] : !hal.executable)[%[[ORDINAL]]
// CHECK-SAME: workgroups(%[[BUFFER_SLOT]] : index)[%[[OFFSET]]]
- // CHECK-SAME: flags("None")
+ // CHECK-SAME: bindings([
+ // CHECK-NEXT: (%[[BUFFER_SLOT]] : index)[%[[OFFSET]], %[[LENGTH]]]
+ // CHECK-NEXT: ]) flags("None")
hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer>
target(%executable: !hal.executable)[%ordinal]
workgroups(%buffer_slot : index)[%offset]
+ bindings([
+ (%buffer_slot : index)[%offset, %length]
+ ])
flags("None")
util.return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir
deleted file mode 100644
index 86180ac..0000000
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/descriptor_set_ops.mlir
+++ /dev/null
@@ -1,20 +0,0 @@
-// RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
-
-// CHECK-LABEL: @descriptor_set_layout_create
-// CHECK-SAME: (%[[DEVICE:.+]]: !hal.device)
-util.func public @descriptor_set_layout_create(%device: !hal.device) {
- // CHECK: = hal.descriptor_set_layout.create
- // CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: flags("None")
- // CHECK-SAME: bindings([
- // CHECK-SAME: #hal.descriptor_set.binding<0, storage_buffer>,
- // CHECK-SAME: #hal.descriptor_set.binding<1, storage_buffer>
- // CHECK-SAME: ]) : !hal.descriptor_set_layout
- %0 = hal.descriptor_set_layout.create device(%device : !hal.device)
- flags("None")
- bindings([
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]) : !hal.descriptor_set_layout
- util.return
-}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
index 5d9874c..3e69574 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
@@ -13,11 +13,9 @@
]) {
// CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes {
// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index]
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
workgroup_size = [4 : index, 1 : index, 1 : index]
}
@@ -42,11 +40,9 @@
// CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes {
// CHECK-SAME: subgroup_size = 64 : index
// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index]
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
subgroup_size = 64 : index,
workgroup_size = [4 : index, 1 : index, 1 : index]
@@ -83,11 +79,9 @@
// CHECK-DAG: hal.executable.export public @entry0 ordinal(0) layout(#pipeline_layout) attributes {
// CHECK-SAME: subgroup_size = 64 : index
// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index]
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
subgroup_size = 64 : index,
workgroup_size = [4 : index, 1 : index, 1 : index]
@@ -141,26 +135,6 @@
// -----
// CHECK-LABEL: @executable_create
-// CHECK-SAME: %[[DEVICE:.+]]: !hal.device,
-// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.pipeline_layout,
-// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.pipeline_layout
-util.func public @executable_create(
- %device: !hal.device,
- %layout0: !hal.pipeline_layout,
- %layout1: !hal.pipeline_layout) {
- // CHECK: = hal.executable.create
- // CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: target(@exe::@binary1)
- // CHECK-SAME: layouts([%[[LAYOUT0]], %[[LAYOUT1]]]) : !hal.executable
- %0 = hal.executable.create device(%device : !hal.device)
- target(@exe::@binary1)
- layouts([%layout0, %layout1]) : !hal.executable
- util.return
-}
-
-// -----
-
-// CHECK-LABEL: @executable_create
// CHECK-SAME: %[[DEVICE:.+]]: !hal.device
util.func public @executable_create(%device: !hal.device) {
// CHECK: = hal.executable.create
@@ -173,36 +147,14 @@
// -----
-// CHECK-LABEL: @pipeline_layout_create
-// CHECK-SAME: %[[DEVICE:.+]]: !hal.device,
-// CHECK-SAME: %[[LAYOUT0:.+]]: !hal.descriptor_set_layout,
-// CHECK-SAME: %[[LAYOUT1:.+]]: !hal.descriptor_set_layout
-util.func public @pipeline_layout_create(
- %device: !hal.device,
- %layout0: !hal.descriptor_set_layout,
- %layout1: !hal.descriptor_set_layout) {
- // CHECK: hal.pipeline_layout.create
- // CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: push_constants(1)
- // CHECK-SAME: layouts([%[[LAYOUT0]], %[[LAYOUT1]]]) : !hal.pipeline_layout
- %0 = hal.pipeline_layout.create device(%device : !hal.device)
- push_constants(1)
- layouts([%layout0, %layout1]) : !hal.pipeline_layout
- util.return
-}
-
-// -----
-
// CHECK-LABEL: @unresolved_workload_ex
hal.executable @unresolved_workload_ex {
// CHECK: hal.executable.variant public @backend
hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
// CHECK: hal.executable.export public @entry0
- hal.executable.export public @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export public @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>) {
^bb0(%device: !hal.device, %arg0: index):
hal.return %arg0, %arg0, %arg0 : index, index, index
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir
index 7800b2a..b87c6bc 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/interface_ops.mlir
@@ -13,8 +13,8 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [<0, storage_buffer>]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: @interface_io_constant
@@ -26,9 +26,11 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [<0, storage_buffer, ReadOnly>]>,
- <1, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-LABEL: @interface_io_subspan
@@ -36,26 +38,26 @@
func.func @interface_io_subspan(%dim0: index, %dim2: index) {
%c8 = arith.constant 8 : index
- // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%[[DIM0]], %[[DIM2]]}
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%dim0, %dim2}
+ // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%[[DIM0]], %[[DIM2]]}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%dim0, %dim2}
- // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) alignment(16) : memref<16xi8>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(1) binding(2) alignment(16) : memref<16xi8>
+ // CHECK: = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(16) : memref<16xi8>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(16) : memref<16xi8>
return
}
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- <0, bindings = [<0, storage_buffer, ReadOnly>]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>
]>
func.func @interface_io_subspan_wrong_dynamic_dim(%dim: index) {
%c8 = arith.constant 8 : index
// expected-error @+1{{result type 'memref<?x4x?x16xi8>' has 2 dynamic dimensions but 1 associated dimension SSA values}}
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%dim}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) offset(%c8) : memref<?x4x?x16xi8>{%dim}
return
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
index 5dd1ea7..59145c5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/test/tensor_ops.mlir
@@ -80,19 +80,11 @@
hal.return %x_capture, %y_capture, %z : index, index, index
}
// Must match the external definition.
- // CHECK: layout(<push_constants = 1, sets =
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ // CHECK: layout(<constants = 1, bindings =
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>)
- // Optional, automatically inferred if omitted.
- // CHECK: bindings([#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>])
- bindings([
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>
- ])
// Can have object references for multiple targets or configurations.
// CHECK: objects({
objects({
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp
index eedb427..decde18 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/ConvertToHAL.cpp
@@ -91,15 +91,6 @@
// Cleanup conversion attributes used for spooky action at a distance.
moduleOp->removeAttr("stream.affinity.default");
- for (auto executableOp : moduleOp.getOps<IREE::HAL::ExecutableOp>()) {
- for (auto variantOp :
- executableOp.getOps<IREE::HAL::ExecutableVariantOp>()) {
- for (auto exportOp :
- variantOp.getOps<IREE::HAL::ExecutableExportOp>()) {
- exportOp->removeAttr("hal.interface.bindings");
- }
- }
- }
}
};
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
index c5cc1c0..9d7cc63 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/DumpExecutableBenchmarks.cpp
@@ -41,7 +41,6 @@
using Vec3 = std::tuple<unsigned, unsigned, unsigned>;
struct Binding {
- unsigned set = 0;
unsigned binding = 0;
int64_t size = 0;
};
@@ -119,15 +118,9 @@
// Work around needing a mutable key for the set; C++ was a mistake.
dispatchOp.forEachEntryPointAttr([&](SymbolRefAttr entryPointAttr) {
- auto exportOp =
- symbolTable.lookupNearestSymbolFrom<IREE::HAL::ExecutableExportOp>(
- dispatchOp, entryPointAttr);
- auto bindingAttrs = IREE::HAL::getInterfaceBindingAttrs(
- exportOp, dispatchOp.getResources().size());
-
SmallVector<Binding> bindings;
- for (auto [bindingAttr, resourceLength] :
- llvm::zip_equal(bindingAttrs, dispatchOp.getResourceLengths())) {
+ for (auto [i, resourceLength] :
+ llvm::enumerate(dispatchOp.getResourceLengths())) {
APInt resourceLengthInt;
if (!matchPattern(resourceLength,
m_ConstantInt(&resourceLengthInt))) {
@@ -136,9 +129,7 @@
<< "` (non-constant resource length)\n";);
return;
}
- bindings.push_back({(unsigned)bindingAttr.getSet(),
- (unsigned)bindingAttr.getBinding(),
- resourceLengthInt.getSExtValue()});
+ bindings.push_back({(unsigned)i, resourceLengthInt.getSExtValue()});
}
auto &dispatchParamsSet = map[entryPointAttr];
@@ -300,7 +291,7 @@
// Constant values.
auto layoutAttr = exportOp.getLayoutAttr();
SmallVector<Value> constantValues;
- if (int64_t pushConstantCount = layoutAttr.getPushConstants()) {
+ if (int64_t pushConstantCount = layoutAttr.getConstants()) {
constantValues.reserve(pushConstantCount);
for (int64_t i = 0; i < pushConstantCount; ++i) {
constantValues.push_back(funcBuilder.create<arith::ConstantOp>(
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
index 37d9603..9f3bee7 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/MaterializeInterfaces.cpp
@@ -263,19 +263,14 @@
makePipelineLayoutAttr(const PipelineLayout &pipelineLayout,
IREE::HAL::ExecutableTargetAttr targetAttr,
OpBuilder &builder) {
- SmallVector<IREE::HAL::DescriptorSetLayoutAttr> setLayoutAttrs;
- for (const auto &setLayout : pipelineLayout.setLayouts) {
- SmallVector<IREE::HAL::DescriptorSetBindingAttr> bindingAttrs;
- for (const auto &binding : setLayout.bindings) {
- bindingAttrs.push_back(IREE::HAL::DescriptorSetBindingAttr::get(
- builder.getContext(), binding.ordinal, binding.type, binding.flags));
- }
- setLayoutAttrs.push_back(IREE::HAL::DescriptorSetLayoutAttr::get(
- builder.getContext(), setLayout.ordinal, bindingAttrs,
- setLayout.flags));
+ SmallVector<IREE::HAL::PipelineBindingAttr> bindingAttrs;
+ for (const auto &binding : pipelineLayout.bindings) {
+ bindingAttrs.push_back(IREE::HAL::PipelineBindingAttr::get(
+ builder.getContext(), binding.type, binding.flags));
}
- return IREE::HAL::PipelineLayoutAttr::get(
- builder.getContext(), pipelineLayout.pushConstantCount, setLayoutAttrs);
+ return IREE::HAL::PipelineLayoutAttr::get(builder.getContext(), bindingAttrs,
+ pipelineLayout.constantCount,
+ pipelineLayout.flags);
}
// Converts the usage of the given primitive |arg| to interface methods.
@@ -297,8 +292,8 @@
static void
convertBindingUsage(mlir::FunctionOpInterface sourceFuncOp, BlockArgument arg,
IREE::HAL::PipelineLayoutAttr pipelineLayoutAttr,
- IREE::HAL::DescriptorSetLayoutAttr setLayoutAttr,
- IREE::HAL::DescriptorSetBindingAttr bindingAttr) {
+ int64_t bindingOrdinal,
+ IREE::HAL::PipelineBindingAttr bindingAttr) {
if (arg.use_empty())
return; // no-op
for (auto &use : llvm::make_early_inc_range(arg.getUses())) {
@@ -309,8 +304,7 @@
arg.getArgNumber(), "stream.alignment");
auto newOp = builder.create<IREE::HAL::InterfaceBindingSubspanOp>(
oldOp.getLoc(), oldOp.getType(), pipelineLayoutAttr,
- APInt(64, setLayoutAttr.getOrdinal()),
- APInt(64, bindingAttr.getOrdinal()), oldOp.getByteOffset(),
+ APInt(64, bindingOrdinal), oldOp.getByteOffset(),
oldOp.getDynamicDims(), alignmentAttr, bindingAttr.getFlags());
oldOp.replaceAllUsesWith(newOp.getResult());
oldOp.erase();
@@ -347,13 +341,10 @@
if (!llvm::isa<IREE::Stream::BindingType>(arg.getType())) {
continue; // unhandled arg type (primitive/etc)
}
- auto setBinding = resourceMap[resourceIdx++];
- auto setLayoutAttr = layoutAttr.getSetLayout(setBinding.first);
- assert(setLayoutAttr && "layout must be consistent");
- auto bindingAttr = setLayoutAttr.getBinding(setBinding.second);
+ auto binding = resourceMap[resourceIdx++];
+ auto bindingAttr = layoutAttr.getBinding(binding);
assert(bindingAttr && "layout must be consistent");
- convertBindingUsage(sourceFuncOp, arg, layoutAttr, setLayoutAttr,
- bindingAttr);
+ convertBindingUsage(sourceFuncOp, arg, layoutAttr, binding, bindingAttr);
}
// Remove all arguments now that we've turned them into lookup ops.
@@ -396,7 +387,7 @@
exportOp->getAttrOfType<IREE::HAL::PipelineLayoutAttr>(
"hal.interface.layout");
const auto &pipelineLayout = layoutAnalysis.getPipelineLayout(exportOp);
- const PipelineResourceMap &resourceMap = pipelineLayout.resourceMap;
+ const auto &resourceMap = pipelineLayout.resourceMap;
// Clone the updated function declaration into each variant.
ExportExpansions exportExpansions;
@@ -444,17 +435,6 @@
exportExpansions[oldRefAttr].push_back(
std::make_pair(newRefAttr, variantOp.getTargetAttr()));
- // Annotate the export with the a mapping of the resources to the
- // interface bindings. This is used during conversion.
- SmallVector<Attribute> bindingAttrs;
- for (auto setBinding : resourceMap) {
- bindingAttrs.push_back(IREE::HAL::InterfaceBindingAttr::get(
- newExportOp.getContext(), setBinding.first, setBinding.second));
- }
- newExportOp->setAttr(
- "hal.interface.bindings",
- ArrayAttr::get(newExportOp.getContext(), bindingAttrs));
-
// Clone the workgroup count calculation function.
if (!exportOp.getWorkgroupCount().empty()) {
mlir::IRMapping mapper;
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir
index e7838d6..c5e799a 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/capture_executable_sources.mlir
@@ -1,12 +1,10 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-hal-capture-executable-sources{stage=configured})' %s | FileCheck %s
#executable_target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK-DAG: #[[EX0_VARIANT0_LOC:.+]] = loc("module_ex0_variant0.configured.mlir"
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
index 1de9b90..6d443f5 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/convert_to_hal.mlir
@@ -7,35 +7,20 @@
#executable_target_embedded_elf_aarch64 = #hal.executable.target<"llvm-cpu", "embedded-elf-aarch64">
#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
-// CHECK: #[[PIPELINE_LAYOUT_ATTR_0:.+]] = #hal.pipeline.layout
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- // CHECK-SAME: <0, storage_buffer>
- #hal.descriptor_set.binding<0, storage_buffer>,
- // CHECK-SAME: <1, storage_buffer>
- #hal.descriptor_set.binding<1, storage_buffer>,
- // CHECK-SAME: <2, storage_buffer>
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
-]>
-// CHECK: #[[PIPELINE_LAYOUT_ATTR_1:.+]] = #hal.pipeline.layout
-#pipeline_layout_1 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- // CHECK-SAME: <4, storage_buffer>
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- // CHECK-SAME: <5, storage_buffer>
- #hal.descriptor_set.binding<5, storage_buffer>,
- // CHECK-SAME: <6, storage_buffer>
- #hal.descriptor_set.binding<6, storage_buffer>
- ]>
+// CHECK: #[[PIPELINE_LAYOUT_ATTR:.+]] = #hal.pipeline.layout
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ // CHECK-SAME: #hal.pipeline.binding<storage_buffer>
+ #hal.pipeline.binding<storage_buffer>,
+ // CHECK-SAME: #hal.pipeline.binding<storage_buffer>
+ #hal.pipeline.binding<storage_buffer>,
+ // CHECK-SAME: #hal.pipeline.binding<storage_buffer>
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK: hal.executable private @ex
hal.executable private @ex {
hal.executable.variant public @embedded_elf_aarch64 target(#executable_target_embedded_elf_aarch64) {
- hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout_0) {
+ hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) {
^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors
%c1 = arith.constant 1 : index
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg0]
@@ -46,15 +31,7 @@
}
}
hal.executable.variant public @embedded_elf_x86_64 target(#executable_target_embedded_elf_x86_64) {
- hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout_1) attributes {
- // Override the bindings. The other variant uses the default ones.
- // CHECK-NOT: hal.interface.bindings
- hal.interface.bindings = [
- #hal.interface.binding<0, 4>,
- #hal.interface.binding<1, 5>,
- #hal.interface.binding<1, 6>
- ]
- } {
+ hal.executable.export public @dispatch ordinal(0) layout(#pipeline_layout) {
^bb0(%device: !hal.device, %arg0: index, %arg1: index, %arg2: index): // no predecessors
%c1 = arith.constant 1 : index
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg0]
@@ -108,9 +85,8 @@
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: mode("OneShot|AllowInlineExecution")
// CHECK-SAME: categories("Transfer|Dispatch")
- %timepoint = stream.cmd.execute
+ %timepoint = stream.cmd.execute once
with(%arg0_resource as %arg0_capture: !stream.resource<external>{%c16},
%arg1_resource as %arg1_capture: !stream.resource<external>{%c16},
%result_resource as %result_capture: !stream.resource<external>{%c16}) {
@@ -121,42 +97,28 @@
// CHECK-DAG: %[[SWITCH0:.+]] = arith.select %[[FORMAT_AARCH64]], %c0, %[[SWITCH1]]
// CHECK: scf.index_switch %[[SWITCH0]]
// CHECK: case 0 {
- // CHECK: %[[PIPELINE_LAYOUT_0:.+]] = hal.pipeline_layout.lookup
- // CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: layout(#[[PIPELINE_LAYOUT_ATTR_0]]) : !hal.pipeline_layout
- // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer>
- // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_0]] : !hal.pipeline_layout)[%c0]
- // CHECK-SAME: bindings([
- // CHECK: %c0 = (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16],
- // CHECK: %c1 = (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16],
- // CHECK: %c2 = (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16]
- // CHECK: ])
// CHECK-DAG: %[[EXECUTABLE_0:.+]] = hal.executable.lookup device(%[[DEVICE]] : !hal.device) executable(@ex) : !hal.executable
// CHECK-DAG: %[[ORDINAL_0:.+]] = hal.executable.export.ordinal target(@ex::@embedded_elf_aarch64::@dispatch) : index
// CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%[[EXECUTABLE_0]] : !hal.executable)[%[[ORDINAL_0]]]
// CHECK-SAME: workgroups([%c1, %c1, %c1])
+ // CHECK-SAME: bindings([
+ // CHECK-NEXT: (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16],
+ // CHECK-NEXT: (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16],
+ // CHECK-NEXT: (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16]
+ // CHECK-NEXT: ])
// CHECK: scf.yield
// CHECK: }
// CHECK: case 1 {
- // CHECK: %[[PIPELINE_LAYOUT_1:.+]] = hal.pipeline_layout.lookup
- // CHECK-SAME: device(%[[DEVICE]] : !hal.device)
- // CHECK-SAME: layout(#[[PIPELINE_LAYOUT_ATTR_1]]) : !hal.pipeline_layout
- // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer>
- // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_1]] : !hal.pipeline_layout)[%c0]
- // CHECK-SAME: bindings([
- // CHECK: %c4 = (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16]
- // CHECK: ])
- // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer>
- // CHECK-SAME: layout(%[[PIPELINE_LAYOUT_1]] : !hal.pipeline_layout)[%c1]
- // CHECK-SAME: bindings([
- // CHECK: %c5 = (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16],
- // CHECK: %c6 = (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16]
- // CHECK: ])
// CHECK-DAG: %[[EXECUTABLE_1:.+]] = hal.executable.lookup device(%[[DEVICE]] : !hal.device) executable(@ex) : !hal.executable
// CHECK-DAG: %[[ORDINAL_1:.+]] = hal.executable.export.ordinal target(@ex::@embedded_elf_x86_64::@dispatch) : index
// CHECK: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%[[EXECUTABLE_1]] : !hal.executable)[%[[ORDINAL_1]]]
+ // CHECK-SAME: bindings([
+ // CHECK-NEXT: (%[[ARG0_BUFFER]] : !hal.buffer)[%c0, %c16]
+ // CHECK-NEXT: (%[[ARG1_BUFFER]] : !hal.buffer)[%c0, %c16],
+ // CHECK-NEXT: (%[[RESULT_BUFFER]] : !hal.buffer)[%c0, %c16]
+ // CHECK-NEXT: ])
// CHECK: scf.yield
// CHECK: }
stream.cmd.dispatch {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
index 484adee..d91f19e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_benchmarks.mlir
@@ -10,18 +10,14 @@
#executable_target_embedded_elf_x86_64
]> : !hal.device
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout_0 = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
-#pipeline_layout_1 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout_1 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// Executable should be dumped:
@@ -74,14 +70,7 @@
// Create command buffer:
// CHECK: %[[CMD:.+]] = hal.command_buffer.create
-// Setup dispatch constants and bindings:
-// CHECK: hal.command_buffer.push_constants<%[[CMD]] : !hal.command_buffer> layout(%{{.+}} : !hal.pipeline_layout) offset(0) values([%c100_i32, %c200_i32]) : i32, i32
// CHECK: %[[BUFFER:.+]] = util.global.load @ex0_embedded_elf_x86_64_dispatch0_512_buffer
-// CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> layout(%{{.+}} : !hal.pipeline_layout)[%c0] bindings([
-// CHECK-NEXT: %c0 = (%[[BUFFER]] : !hal.buffer)[%c0, %c32],
-// CHECK-NEXT: %c1 = (%[[BUFFER]] : !hal.buffer)[%c256, %c32],
-// CHECK-NEXT: %c2 = (%[[BUFFER]] : !hal.buffer)[%c512, %c32]
-// CHECK-NEXT: ])
// Calculate the workgroup count, which we leave symbolic until after
// translation:
@@ -96,7 +85,15 @@
// Dispatch up to batch size dispatches:
// CHECK: scf.for %{{.+}} = %c0 to %[[BATCH_SIZE]] step %c1 {
-// CHECK-NEXT: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer> target(%[[EXECUTABLE:.+]] : !hal.executable)[%[[ORDINAL_0]]] workgroups([%[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]])
+// CHECK-NEXT: hal.command_buffer.dispatch<%[[CMD]] : !hal.command_buffer>
+// CHECK-SAME: target(%[[EXECUTABLE:.+]] : !hal.executable)[%[[ORDINAL_0]]]
+// CHECK-SAME: workgroups([%[[WORKGROUP_X]], %[[WORKGROUP_Y]], %[[WORKGROUP_Z]]])
+// CHECK-SAME: constants([%c100_i32, %c200_i32])
+// CHECK-SAME: bindings([
+// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c0, %c32],
+// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c256, %c32],
+// CHECK-NEXT: (%[[BUFFER]] : !hal.buffer)[%c512, %c32]
+// CHECK-NEXT: ])
// CHECK-NEXT: hal.command_buffer.execution_barrier
// CHECK-NEXT: }
@@ -181,10 +178,8 @@
#executable_target_embedded_elf_x86_64
]> : !hal.device
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @ex_0 {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir
index 5de1c9c..d22f7a1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/dump_executable_sources.mlir
@@ -4,12 +4,10 @@
// but this is much easier to test with lit.
#executable_target_embedded_elf_x86_64 = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64">
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK: hal.executable public @ex0
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
index 861d4e0..1b9070d 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/elide_redundant_commands.mlir
@@ -3,127 +3,21 @@
// Tests that redundant barriers are elided but barriers gaurding ops are not.
// CHECK-LABEL: @elideRedundantBarriers
-// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout)
-util.func public @elideRedundantBarriers(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
+// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[BUFFER:.+]]: !hal.buffer)
+util.func public @elideRedundantBarriers(%cmd: !hal.command_buffer, %buffer: !hal.buffer) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %c42_i32 = arith.constant 42 : i32
// CHECK: hal.command_buffer.execution_barrier
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
// CHECK-NOT: hal.command_buffer.execution_barrier
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
- // CHECK: hal.command_buffer.push_constants
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c42_i32]) : i32
+ // CHECK: hal.command_buffer.copy_buffer
+ hal.command_buffer.copy_buffer<%cmd : !hal.command_buffer>
+ source(%buffer : !hal.buffer)[%c0]
+ target(%buffer : !hal.buffer)[%c0]
+ length(%c1)
// CHECK: hal.command_buffer.execution_barrier
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None")
// CHECK: util.return
util.return
}
-
-// -----
-
-// CHECK-LABEL: @elidePushConstants
-util.func public @elidePushConstants(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
- // CHECK-DAG: %[[C0:.+]] = arith.constant 0
- %c0 = arith.constant 0 : i32
- // CHECK-DAG: %[[C1:.+]] = arith.constant 1
- %c1 = arith.constant 1 : i32
- // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]], %[[C1]]])
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0, %c1]) : i32, i32
- // CHECK-NOT: hal.command_buffer.push_constants
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0, %c1]) : i32, i32
- // CHECK-NOT: hal.command_buffer.push_constants
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0, %c1]) : i32, i32
- // CHECK: util.return
- util.return
-}
-
-// -----
-
-// CHECK-LABEL: @elidePushConstantsPrefix
-util.func public @elidePushConstantsPrefix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
- // CHECK-DAG: %[[C0:.+]] = arith.constant 0
- %c0 = arith.constant 0 : i32
- // CHECK-DAG: %[[C1:.+]] = arith.constant 1
- %c1 = arith.constant 1 : i32
- // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]]])
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0]) : i32
- // CHECK: hal.command_buffer.push_constants{{.+}} offset(1) values([%[[C1]]])
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0, %c1]) : i32, i32
- // CHECK-NOT: hal.command_buffer.push_constants
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(1)
- values([%c1]) : i32
- // CHECK: util.return
- util.return
-}
-
-// -----
-
-// CHECK-LABEL: @elidePushConstantsSuffix
-util.func public @elidePushConstantsSuffix(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout) {
- // CHECK-DAG: %[[C0:.+]] = arith.constant 0
- %c0 = arith.constant 0 : i32
- // CHECK-DAG: %[[C1:.+]] = arith.constant 1
- %c1 = arith.constant 1 : i32
- // CHECK-DAG: %[[C2:.+]] = arith.constant 2
- %c2 = arith.constant 2 : i32
- // CHECK: hal.command_buffer.push_constants{{.+}} offset(0) values([%[[C0]], %[[C1]], %[[C2]]])
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(0)
- values([%c0, %c1, %c2]) : i32, i32, i32
- // CHECK: hal.command_buffer.push_constants{{.+}} offset(1) values([%[[C0]]])
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
- layout(%pipeline_layout : !hal.pipeline_layout)
- offset(1)
- values([%c0, %c2]) : i32, i32
- // CHECK: util.return
- util.return
-}
-
-// -----
-
-// NOTE: today we just check for complete equality.
-
-// CHECK-LABEL: @elidePushDescriptorSet
-// CHECK-SAME: (%[[CMD:.+]]: !hal.command_buffer, %[[LAYOUT:.+]]: !hal.pipeline_layout, %[[BUFFER0:.+]]: !hal.buffer, %[[BUFFER1:.+]]: !hal.buffer)
-util.func public @elidePushDescriptorSet(%cmd: !hal.command_buffer, %pipeline_layout: !hal.pipeline_layout, %buffer0: !hal.buffer, %buffer1: !hal.buffer) {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- // CHECK-DAG: %[[SIZE0:.+]] = arith.constant 100
- %size0 = arith.constant 100 : index
- // CHECK-DAG: %[[SIZE1:.+]] = arith.constant 101
- %size1 = arith.constant 101 : index
- // CHECK: hal.command_buffer.push_descriptor_set<%[[CMD]] : !hal.command_buffer> layout(%[[LAYOUT]] : !hal.pipeline_layout)[%c0] bindings([
- // CHECK-NEXT: %c0 = (%[[BUFFER0]] : !hal.buffer)[%c0, %[[SIZE0]]],
- // CHECK-NEXT: %c1 = (%[[BUFFER1]] : !hal.buffer)[%c0, %[[SIZE1]]]
- // CHECK-NEXT: ])
- hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([
- %c0 = (%buffer0 : !hal.buffer)[%c0, %size0],
- %c1 = (%buffer1 : !hal.buffer)[%c0, %size1]
- ])
- // CHECK-NOT: hal.command_buffer.push_descriptor_set
- hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([
- %c0 = (%buffer0 : !hal.buffer)[%c0, %size0],
- %c1 = (%buffer1 : !hal.buffer)[%c0, %size1]
- ])
- // CHECK: util.return
- util.return
-}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
index 5623e7f..2fb1a66 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
@@ -10,17 +10,15 @@
]> : !hal.device
// CHECK: #pipeline_layout = #hal.pipeline.layout<
-// CHECK-SAME: push_constants = 1
-// CHECK-SAME: sets = [
-// CHECK-SAME: <0, bindings = [
-// CHECK-SAME: <0, storage_buffer, "ReadOnly|Indirect">
-// CHECK-SAME: <1, storage_buffer, "ReadOnly|Indirect">
-// CHECK-SAME: <2, storage_buffer, Indirect>
+// CHECK-SAME: constants = 1
+// CHECK-SAME: bindings = [
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer, Indirect>
// CHECK: hal.executable private @ex
// CHECK: hal.executable.variant public @arm_64 target(#executable_target_arm_64
// CHECK: hal.executable.export public @entry ordinal(0) layout(#pipeline_layout)
-// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]
// CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index):
// CHECK-NEXT: hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index
// CHECK-NEXT: }
@@ -29,7 +27,6 @@
// CHECK-NEXT: func.func @entry
// CHECK: hal.executable.variant public @x86_64 target(#executable_target_x86_64
// CHECK: hal.executable.export public @entry ordinal(0) layout(#pipeline_layout)
-// CHECK-SAME: hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]
// CHECK-NEXT: ^bb0(%[[DEVICE:.+]]: !hal.device, %[[ARG0:.+]]: index, %[[ARG1:.+]]: index):
// CHECK-NEXT: hal.return %[[ARG0]], %[[ARG1]], %[[ARG0]] : index, index, index
// CHECK-NEXT: }
@@ -159,10 +156,8 @@
// CHECK: hal.executable.variant public @riscv_32
// CHECK: hal.executable.variant public @x86_64
hal.executable.source private @ex {
- hal.executable.export public @entry layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+ hal.executable.export public @entry layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>)
builtin.module {
func.func @entry() {
@@ -227,10 +222,8 @@
// CHECK: hal.executable.variant public @riscv_32
// CHECK: hal.executable.variant public @x86_64
hal.executable.source public @ex {
- hal.executable.export public @entry layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+ hal.executable.export public @entry layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>)
builtin.module {
func.func @entry() {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
index 4e562f6..4a24e78 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/materialize_resource_caches.mlir
@@ -1,51 +1,13 @@
// RUN: iree-opt --split-input-file --iree-hal-materialize-resource-caches %s | FileCheck %s
-// CHECK: util.global private @device = #hal.device.ordinal<0>
-util.global private @device = #hal.device.ordinal<0> : !hal.device
-// CHECK: util.global private @__device_pipeline_layout_0 : !hal.pipeline_layout
-// CHECK-NEXT: util.initializer {
-// CHECK-DAG: %[[DEVICE:.+]] = util.global.load @device
-// CHECK-DAG: %[[SET_LAYOUT_0:.+]] = hal.descriptor_set_layout.create
-// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
-// CHECK-SAME: flags("None")
-// CHECK-SAME: bindings([
-// CHECK-SAME: #hal.descriptor_set.binding<0, storage_buffer>,
-// CHECK-SAME: #hal.descriptor_set.binding<1, storage_buffer>
-// CHECK-SAME: ]) : !hal.descriptor_set_layout
-// CHECK-NEXT: %[[PIPELINE_LAYOUT:.+]] = hal.pipeline_layout.create
-// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
-// CHECK-SAME: push_constants(1)
-// CHECK-SAME: layouts([%[[SET_LAYOUT_0]]]) : !hal.pipeline_layout
-// CHECK-NEXT: util.global.store %[[PIPELINE_LAYOUT]], @__device_pipeline_layout_0 : !hal.pipeline_layout
-
-// CHECK-LABEL: @exeLayoutLookup
-util.func public @exeLayoutLookup() -> !hal.pipeline_layout {
- %device = util.global.load @device : !hal.device
- // CHECK: %[[LOADED_LAYOUT:.+]] = util.global.load @__device_pipeline_layout_0 : !hal.pipeline_layout
- %0 = hal.pipeline_layout.lookup device(%device : !hal.device) layout(#hal.pipeline.layout<push_constants = 1, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
- ]>) : !hal.pipeline_layout
- // CHECK-NEXT: util.return %[[LOADED_LAYOUT]]
- util.return %0 : !hal.pipeline_layout
-}
-
-// -----
-
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout_0 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
-#pipeline_layout_1 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout_1 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK: hal.executable private @exe
@@ -83,22 +45,12 @@
util.global private @device = #hal.device.ordinal<0> : !hal.device
// Cached resources for the device.
-// CHECK: util.global private @__device_pipeline_layout_0 : !hal.pipeline_layout
-// CHECK: util.global private @__device_pipeline_layout_1 : !hal.pipeline_layout
// CHECK: util.global private @__device_executable_0_exe : !hal.executable
// Device initializer for all resources used with the device:
// CHECK: util.initializer
// CHECK: %[[DEVICE:.+]] = util.global.load @device
-// Create pipeline layouts (and required descriptor set layouts):
-// CHECK: %[[SET_LAYOUT_0:.+]] = hal.descriptor_set_layout.create device(%[[DEVICE]] : !hal.device)
-// CHECK: %[[SET_LAYOUT_1:.+]] = hal.descriptor_set_layout.create device(%[[DEVICE]] : !hal.device)
-// CHECK: %[[PIPELINE_LAYOUT_0:.+]] = hal.pipeline_layout.create device(%[[DEVICE]] : !hal.device) push_constants(0) layouts([%[[SET_LAYOUT_0]]]) : !hal.pipeline_layout
-// CHECK: util.global.store %[[PIPELINE_LAYOUT_0]], @__device_pipeline_layout_0
-// CHECK: %[[PIPELINE_LAYOUT_1:.+]] = hal.pipeline_layout.create device(%device : !hal.device) push_constants(0) layouts([%[[SET_LAYOUT_1]]]) : !hal.pipeline_layout
-// CHECK: util.global.store %[[PIPELINE_LAYOUT_1]], @__device_pipeline_layout_1
-
// Switch on the supported formats:
// CHECK: %{{.+}}, %[[FORMAT_VMVX:.+]] = hal.device.query<%[[DEVICE]] : !hal.device> key("hal.executable.format" :: "vmvx-bytecode-fb")
// CHECK: %[[VMVX_CONDITION:.+]] = scf.execute_region -> i1 {
@@ -120,7 +72,6 @@
// CHECK: %[[EXE:.+]] = hal.executable.create
// CHECK-SAME: device(%[[DEVICE]] : !hal.device)
// CHECK-SAME: target(@exe::@vmvx)
-// CHECK-SAME: layouts([%[[PIPELINE_LAYOUT_0]], %[[PIPELINE_LAYOUT_0]], %[[PIPELINE_LAYOUT_1]]])
// CHECK-SAME: constants([%[[CONST_01]]#0, %[[CONST_01]]#1, %[[CONST_2]]])
// CHECK-SAME: : !hal.executable
@@ -172,11 +123,9 @@
%ok, %selected = hal.device.query<%device : !hal.device> key("some" :: "feature") : i1, i1
hal.return %selected : i1
}
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>)
// CHECK-NOT: hal.executable.constant.block
hal.executable.constant.block() -> (i32, i32) as ("foo", "bar") {
@@ -189,13 +138,9 @@
// CHECK: util.global private @primary_device
util.global private @primary_device = #hal.device.ordinal<0> : !hal.device
-// CHECK-NEXT: util.global private @__primary_device_pipeline_layout_0
// CHECK-NEXT: util.global private @__primary_device_executable_0_exe
// CHECK-NEXT: util.initializer
// CHECK: util.global.load @primary_device
-// CHECK: hal.descriptor_set_layout.create
-// CHECK: hal.pipeline_layout.create
-// CHECK: util.global.store {{.+}}, @__primary_device_pipeline_layout_0
// CHECK: hal.executable.create
// CHECK: util.global.store {{.+}}, @__primary_device_executable_0_exe
// CHECK: util.func private @__primary_device_executable_0_exe_constant_block_0
@@ -205,7 +150,6 @@
#hal.device.ordinal<1> : !hal.device,
#hal.device.fallback<@primary_device> : !hal.device
]> : !hal.device
-// CHECK-NEXT: util.global private @__optional_device_pipeline_layout_0
// CHECK-NEXT: util.global private @__optional_device_executable_0_exe
// CHECK-NEXT: util.initializer
// CHECK-DAG: %[[OPTIONAL_DEVICE:.+]] = util.global.load @optional_device
@@ -214,14 +158,9 @@
// CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]]
// CHECK-DAG: scf.index_switch %[[INDEX]]
// CHECK: case 0
-// CHECK: %[[PRIMARY_LAYOUT:.+]] = util.global.load @__primary_device_pipeline_layout_0
-// CHECK: util.global.store %[[PRIMARY_LAYOUT]], @__optional_device_pipeline_layout_0
// CHECK: %[[PRIMARY_EXE:.+]] = util.global.load @__primary_device_executable_0_exe
// CHECK: util.global.store %[[PRIMARY_EXE]], @__optional_device_executable_0_exe
// CHECK: default
-// CHECK: hal.descriptor_set_layout.create
-// CHECK: hal.pipeline_layout.create
-// CHECK: util.global.store {{.+}}, @__optional_device_pipeline_layout_0
// CHECK: hal.executable.create
// CHECK: util.global.store {{.+}}, @__optional_device_executable_0_exe
// CHECK: util.func private @__optional_device_executable_0_exe_constant_block_0
@@ -248,23 +187,17 @@
hal.executable private @exe {
hal.executable.variant @vmvx target(<"vmvx", "vmvx-bytecode-fb">) {
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
// CHECK-LABEL: util.global private @primary_device
util.global private @primary_device = #hal.device.ordinal<0> : !hal.device
-// CHECK-NEXT: util.global private @__primary_device_pipeline_layout_0
// CHECK-NEXT: util.global private @__primary_device_executable_0_exe
// CHECK-NEXT: util.initializer
// CHECK: util.global.load @primary_device
-// CHECK: hal.descriptor_set_layout.create
-// CHECK: hal.pipeline_layout.create
-// CHECK: util.global.store {{.+}}, @__primary_device_pipeline_layout_0
// CHECK: hal.executable.create
// CHECK: util.global.store {{.+}}, @__primary_device_executable_0_exe
@@ -273,7 +206,6 @@
#hal.device.ordinal<1> : !hal.device,
#hal.device.fallback<@primary_device> : !hal.device
]> : !hal.device
-// CHECK-NEXT: util.global private @__optional_device_0_pipeline_layout_0
// CHECK-NEXT: util.global private @__optional_device_0_executable_0_exe
// CHECK-NEXT: util.initializer
// CHECK-DAG: %[[OPTIONAL_DEVICE_0:.+]] = util.global.load @optional_device_0
@@ -281,8 +213,6 @@
// CHECK-DAG: %[[DEVICE_EQ:.+]] = util.cmp.eq %[[OPTIONAL_DEVICE_0]], %[[PRIMARY_DEVICE]]
// CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]]
// CHECK-DAG: scf.index_switch %[[INDEX]]
-// CHECK: util.global.load @__primary_device_pipeline_layout_0
-// CHECK: util.global.store {{.+}}, @__optional_device_0_pipeline_layout_0
// CHECK: util.global.load @__primary_device_executable_0_exe
// CHECK: util.global.store {{.+}}, @__optional_device_0_executable_0_exe
@@ -291,7 +221,6 @@
#hal.device.ordinal<2> : !hal.device,
#hal.device.fallback<@optional_device_0> : !hal.device
]> : !hal.device
-// CHECK-NEXT: util.global private @__optional_device_1_pipeline_layout_0
// CHECK-NEXT: util.global private @__optional_device_1_executable_0_exe
// CHECK-NEXT: util.initializer
// CHECK-DAG: %[[OPTIONAL_DEVICE_1:.+]] = util.global.load @optional_device_1
@@ -299,8 +228,6 @@
// CHECK-DAG: %[[DEVICE_EQ:.+]] = util.cmp.eq %[[OPTIONAL_DEVICE_1]], %[[OPTIONAL_DEVICE_0]]
// CHECK-DAG: %[[INDEX:.+]] = arith.select %[[DEVICE_EQ]]
// CHECK-DAG: scf.index_switch %[[INDEX]]
-// CHECK: util.global.load @__optional_device_0_pipeline_layout_0
-// CHECK: util.global.store {{.+}}, @__optional_device_1_pipeline_layout_0
// CHECK: util.global.load @__optional_device_0_executable_0_exe
// CHECK: util.global.store {{.+}}, @__optional_device_1_executable_0_exe
@@ -322,34 +249,13 @@
// could rework the pass to support only materializing what's required based on
// what resources are looked up.
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout_0 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
util.global private @device : !hal.device
-util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout
-util.initializer {
- %c0 = arith.constant 0 : index
- %device = hal.devices.get %c0 : !hal.device
- %descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) flags("None") bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>]) : !hal.descriptor_set_layout
- util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout
- util.return
-}
-
-util.global private @_pipeline_layout_0 : !hal.pipeline_layout
-util.initializer {
- %_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout
- %c0 = arith.constant 0 : index
- %device = hal.devices.get %c0 : !hal.device
- %pipeline_layout = hal.pipeline_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.pipeline_layout
- util.global.store %pipeline_layout, @_pipeline_layout_0 : !hal.pipeline_layout
- util.return
-}
-
util.global private @_executable_exe : !hal.executable
util.initializer {
%c0 = arith.constant 0 : index
@@ -359,8 +265,7 @@
%variant = arith.select %format_supported, %c0, %c-1 : index
%selected = scf.index_switch %variant -> !hal.executable
case 0 {
- %_pipeline_layout_0 = util.global.load @_pipeline_layout_0 : !hal.pipeline_layout
- %exe = hal.executable.create device(%device : !hal.device) target(@exe0::@vmvx) layouts([%_pipeline_layout_0]) : !hal.executable
+ %exe = hal.executable.create device(%device : !hal.device) target(@exe0::@vmvx) : !hal.executable
scf.yield %exe : !hal.executable
}
default {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir
index 6f1f799..8dd0e55 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/outline_memoize_regions.mlir
@@ -194,6 +194,9 @@
// CHECK: hal.command_buffer.dispatch.indirect<%[[CMD]] : !hal.command_buffer>
// CHECK-SAME: target(%[[APPLY_EXECUTABLE]] : !hal.executable)
// CHECK-SAME: workgroups(%[[APPLY_BUFFER]] : !hal.buffer)
+// CHECK-SAME: bindings([
+// CHECK-NEXT: (%[[APPLY_BUFFER]] : !hal.buffer)[%c0, %c1]
+// CHECK-NEXT: ])
// CHECK: hal.command_buffer.execution_barrier
// CHECK: hal.command_buffer.finalize
// CHECK: util.return %[[CMD]]
@@ -216,6 +219,8 @@
%affinity = arith.constant 2 : i64
%executable = util.global.load immutable @executable : !hal.executable
%buffer = util.global.load immutable @buffer : !hal.buffer
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
// CHECK-NOT: hal.device.memoize
// CHECK: %[[CMD:.+]] = util.call @__memoize_command_buffer_memoize_lookup
%result = hal.device.memoize<%device : !hal.device> affinity(%affinity) -> !hal.command_buffer {
@@ -225,6 +230,9 @@
hal.command_buffer.dispatch.indirect<%cmd : !hal.command_buffer>
target(%executable : !hal.executable)[%dispatch_ordinal]
workgroups(%buffer : !hal.buffer)[%offset]
+ bindings([
+ (%buffer : !hal.buffer)[%c0, %c1]
+ ])
flags(None)
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer>
source(CommandIssue) target(CommandProcess) flags(None)
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir
index 6742b24..0f4a829 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/preprocess_executables.mlir
@@ -21,7 +21,9 @@
hal.executable private @executable_a {
// CHECK: hal.executable.variant public @variant_a
hal.executable.variant public @variant_a target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 123 : i64}>) {
- hal.executable.export public @dispatch_a ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch_a ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index):
%c1 = arith.constant 1 : index
hal.return %c1, %c1, %c1 : index, index, index
@@ -37,7 +39,9 @@
}
// CHECK: hal.executable.variant public @variant_unmodified
hal.executable.variant public @variant_unmodified target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {}>) {
- hal.executable.export public @dispatch_unmodified ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch_unmodified ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index):
%c1 = arith.constant 1 : index
hal.return %c1, %c1, %c1 : index, index, index
@@ -57,7 +61,9 @@
hal.executable private @executable_b {
// CHECK: hal.executable.variant public @variant_b
hal.executable.variant public @variant_b target(#hal.executable.target<"cuda", "cuda-nvptx-fb", {replace_i64 = 456 : i64}>) {
- hal.executable.export public @dispatch_b ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch_b ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
hal.return %c1, %c1, %c1 : index, index, index
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir
index 8d2ea70..3c711ef 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/prune_executables.mlir
@@ -5,10 +5,8 @@
// as part of this pass for consistency (after running no executables/variants/
// exports that are unused exist).
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
// Should be removed as there are no uses.
@@ -57,10 +55,8 @@
// Tests that an export with no references is dropped.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @exe {
hal.executable.variant public @variant target(<"backend", "format">) {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
index a139ece..164bcbf 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/repeat_dispatches.mlir
@@ -1,24 +1,25 @@
// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(util.func(iree-hal-repeat-dispatches{count=2}))' %s | FileCheck %s
-util.global @_executable : !hal.executable
+util.global @executable : !hal.executable
// CHECK-LABEL: @duplicate_dispatches
-// CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer,
-// CHECK-SAME: %[[CMD2:.+]]: !hal.command_buffer)
-util.func public @duplicate_dispatches(%cmd1 : !hal.command_buffer, %cmd2 : !hal.command_buffer) {
- // CHECK: %[[EXE:.+]] = util.global.load @_executable
- %exe = util.global.load @_executable : !hal.executable
+// CHECK-SAME: (%[[CMD1:[a-z0-9]+]]: !hal.command_buffer,
+// CHECK-SAME: %[[CMD2:[a-z0-9]+]]: !hal.command_buffer,
+// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer)
+util.func public @duplicate_dispatches(%cmd1: !hal.command_buffer, %cmd2: !hal.command_buffer, %buffer: !hal.buffer) {
+ // CHECK: %[[EXE:.+]] = util.global.load @executable
+ %exe = util.global.load @executable : !hal.executable
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
- hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None)
hal.command_buffer.execution_barrier<%cmd1 : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None")
- hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c1] workgroups([%c2, %c2, %c2]) flags(None)
+ hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c1] workgroups([%c2, %c2, %c2]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None)
- hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c2] workgroups([%c1, %c1, %c1]) flags(None)
- hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c3] workgroups([%c2, %c2, %c2]) flags(None)
+ hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c2] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None)
+ hal.command_buffer.dispatch<%cmd2 : !hal.command_buffer> target(%exe : !hal.executable)[%c3] workgroups([%c2, %c2, %c2]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None)
hal.command_buffer.execution_barrier<%cmd2 : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None")
util.return
@@ -46,20 +47,21 @@
// -----
-util.global @_executable : !hal.executable
+util.global @executable : !hal.executable
// CHECK-LABEL: @nested_dispatch
// CHECK-SAME: (%[[CMD1:.+]]: !hal.command_buffer,
+// CHECK-SAME: %[[BUFFER:.+]]: !hal.buffer,
// CHECK-SAME: %[[IDX:.+]]: index)
-util.func public @nested_dispatch(%cmd1 : !hal.command_buffer, %idx : index) {
- // CHECK: %[[EXE:.+]] = util.global.load @_executable
- %exe = util.global.load @_executable : !hal.executable
+util.func public @nested_dispatch(%cmd1: !hal.command_buffer, %buffer: !hal.buffer, %idx: index) {
+ // CHECK: %[[EXE:.+]] = util.global.load @executable
+ %exe = util.global.load @executable : !hal.executable
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
scf.index_switch %idx
case 0 {
- hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) flags(None)
+ hal.command_buffer.dispatch<%cmd1 : !hal.command_buffer> target(%exe : !hal.executable)[%c0] workgroups([%c1, %c1, %c1]) bindings([(%buffer : !hal.buffer)[%c0, %c1]]) flags(None)
scf.yield
}
default {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
index 5ce9f72..45658e9 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/resolve_export_ordinals.mlir
@@ -2,19 +2,15 @@
hal.executable @exe0 {
hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) {
- hal.executable.export @entry123 ordinal(123) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+ hal.executable.export @entry123 ordinal(123) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
hal.executable @exe1 {
hal.executable.variant @target target(<"vmvx", "vmvx-bytecode-fb">) {
- hal.executable.export @entry456 ordinal(456) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+ hal.executable.export @entry456 ordinal(456) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>)
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir
index ef9bff7..7cc2a58 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/strip_executable_contents.mlir
@@ -5,11 +5,9 @@
// CHECK: hal.executable.variant public @backend
hal.executable.variant @backend target(#hal.executable.target<"backend", "format">) {
// CHECK: hal.executable.export public @entry0
- hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+ hal.executable.export @entry0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>)
// CHECK-NOT: builtin.module
builtin.module {
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir
index ba2baad..19b229b 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables.mlir
@@ -7,7 +7,9 @@
// CHECK: hal.executable private @executable0
hal.executable private @executable0 {
hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
// CHECK: arith.constant 123
%c1 = arith.constant 1 : index
@@ -34,7 +36,9 @@
// CHECK-SAME: path = "substitute_executables_replacement.obj",
// CHECK-SAME: data = dense<[72, 69, 76, 76, 79, 33,
hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @dispatch1 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch1 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
// CHECK: arith.constant 100 : index
%c100 = arith.constant 100 : index
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir
index 7c96db9..d12d3d1 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir
+++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/test/substitute_executables_replacement.mlir
@@ -1,7 +1,9 @@
// Replacement executable for substitute_executables.mlir.
hal.executable private @executable0 {
hal.executable.variant public @variant target(<"cuda", "cuda-nvptx-fb">) {
- hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer>]>]>) {
+ hal.executable.export public @dispatch0 ordinal(0) layout(#hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c123 = arith.constant 123 : index
hal.return %c123, %c123, %c123 : index, index, index
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp
index 7f17192..2f1aa4f 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/ConvertHALToVMVX.cpp
@@ -224,9 +224,7 @@
.replaceOpWithNewOp<IREE::Util::ListGetOp>(
op, bindingType, bindingsArg,
rewriter.createOrFold<arith::ConstantIndexOp>(
- op.getLoc(), op.getLayout().getFlatBindingIndex(
- op.getSet().getSExtValue(),
- op.getBinding().getSExtValue())))
+ op.getLoc(), op.getBinding().getSExtValue()))
.getResult();
return success();
}
@@ -249,12 +247,13 @@
IndexSet indexSet(op.getLoc(), rewriter);
auto bindingType = llvm::cast<IREE::Util::ListType>(bindingsArg.getType())
.getElementType();
- auto sourceBuffer = rewriter
- .create<IREE::Util::ListGetOp>(
- op.getLoc(), bindingType, bindingsArg,
- rewriter.createOrFold<arith::ConstantIndexOp>(
- op.getLoc(), op.getFlatBindingIndex()))
- .getResult();
+ auto sourceBuffer =
+ rewriter
+ .create<IREE::Util::ListGetOp>(
+ op.getLoc(), bindingType, bindingsArg,
+ rewriter.createOrFold<arith::ConstantIndexOp>(
+ op.getLoc(), op.getBinding().getSExtValue()))
+ .getResult();
if (op.getByteOffset() && !matchPattern(op.getByteOffset(), m_Zero())) {
// Offsetted binding: replace with a BufferSubspanOp.
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir
index dabea33..91a8bb6 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Conversion/HALToVMVX/test/interface_ops.mlir
@@ -1,10 +1,8 @@
// RUN: iree-opt --split-input-file --iree-vmvx-conversion --canonicalize %s | FileCheck %s
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// CHECK: util.global private @__constant_5xi32 : !util.buffer
@@ -33,9 +31,9 @@
%c1 = arith.constant 1 : index
%0 = memref.get_global @__constant_5xi32 : memref<5xi32>
// CHECK: %[[BINDING0:.+]] = util.list.get %[[BINDINGS]][%c0] : !util.list<!util.buffer>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<5xf32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<5xf32>
// CHECK: %[[BINDING1:.+]] = util.list.get %[[BINDINGS]][%c1] : !util.list<!util.buffer>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) : memref<5xi32>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) : memref<5xi32>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td
index 218a38f..6321e1c 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td
+++ b/compiler/src/iree/compiler/Dialect/VMVX/IR/VMVXOps.td
@@ -73,7 +73,6 @@
}];
let arguments = (ins
HAL_PipelineLayoutAttr:$layout,
- IndexAttr:$set,
IndexAttr:$binding
);
let results = (outs
@@ -81,7 +80,6 @@
);
let assemblyFormat = [{
`layout` `(` $layout `)`
- `set` `(` $set `)`
`binding` `(` $binding `)`
attr-dict
}];
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp
index 5d38c4f..cc40323 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/ResolveBufferDescriptors.cpp
@@ -315,7 +315,7 @@
rewriter
.create<IREE::VMVX::GetRawInterfaceBindingBufferOp>(
loc, op.getBaseBuffer().getType(), binding.getLayout(),
- binding.getSetAttr(), binding.getBindingAttr())
+ binding.getBindingAttr())
.getResult());
rewriter.eraseOp(op);
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
index f2b1ffc..e810002 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/resolve_buffer_descriptors.mlir
@@ -60,15 +60,13 @@
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_zero_offset() -> (!util.buffer, index, index, index, index, index) {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<512x384xf32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<512x384xf32>
%base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<512x384xf32> -> !util.buffer, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index
}
@@ -77,19 +75,17 @@
// CHECK-DAG: %[[C384:.+]] = arith.constant 384 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0)
+// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0)
// CHECK: return %[[CAST]], %[[C0]], %[[C512]], %[[C384]], %[[C384]], %[[C1]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_offset_index(%arg0 : index) -> (!util.buffer, index, index, index, index, index) {
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%arg0) : memref<512x384xindex>
%base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<512x384xindex> -> !util.buffer, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index
}
@@ -100,26 +96,24 @@
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[INDEX_SIZE:.+]] = util.sizeof index
// CHECK-DAG: %[[OFFSET:.+]] = affine.apply #map()[%arg0, %[[INDEX_SIZE]]]
-// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0)
+// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0)
// CHECK: return %[[CAST]], %[[OFFSET]], %[[C512]], %[[C384]], %[[C384]], %[[C1]]
// -----
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
func.func @resolve_binding_subspan_dyn_dims(%arg0 : index, %arg1 : index) -> (!util.buffer, index, index, index, index, index) {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<?x?xindex>{%arg0, %arg1}
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<?x?xindex>{%arg0, %arg1}
%base_buffer, %offset, %sizes:2, %strides:2 = vmvx.get_buffer_descriptor %0 : memref<?x?xindex> -> !util.buffer, index, index, index, index, index
return %base_buffer, %offset, %sizes#0, %sizes#1, %strides#0, %strides#1 : !util.buffer, index, index, index, index, index
}
// CHECK: func @resolve_binding_subspan_dyn_dims(
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) set(0) binding(0)
+// CHECK: %[[CAST:.+]] = vmvx.get_raw_interface_binding_buffer layout({{.+}}) binding(0)
// CHECK: return %[[CAST]], %{{.+}}, %arg0, %arg1, %arg1, %[[C1]]
// -----
diff --git a/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp b/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp
index 1db69fc..f0b0322 100644
--- a/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/CollapseDimensions.cpp
@@ -739,7 +739,7 @@
FailureOr<AffineMap> consumerToProducerMap =
getConsumerLoopToProducerLoopsMap(*operand);
if (failed(consumerToProducerMap)) {
- didChange |= producerInfo.getCollapsibleLoops().size();
+ didChange |= !producerInfo.getCollapsibleLoops().empty();
producerInfo.clear();
continue;
}
diff --git a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
index 7b950f7..de1f736 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
+++ b/compiler/src/iree/compiler/InputConversion/Common/IREEImportPublic.cpp
@@ -94,43 +94,34 @@
}
}
-static IREE::HAL::DescriptorSetBindingAttr
-convertDescriptorSetBinding(IREE::Input::DescriptorSetBindingAttr src) {
- return IREE::HAL::DescriptorSetBindingAttr::get(
- src.getContext(), src.getOrdinal(), convertDescriptorType(src.getType()),
+static IREE::HAL::PipelineBindingAttr
+convertPipelineBinding(IREE::Input::PipelineBindingAttr src) {
+ return IREE::HAL::PipelineBindingAttr::get(
+ src.getContext(), convertDescriptorType(src.getType()),
convertDescriptorFlags(src.getFlags()));
}
-static std::optional<IREE::HAL::DescriptorSetLayoutFlags>
-convertDescriptorSetLayoutFlags(
- std::optional<IREE::Input::DescriptorSetLayoutFlags> src) {
+static std::optional<IREE::HAL::PipelineLayoutFlags> convertPipelineLayoutFlags(
+ std::optional<IREE::Input::PipelineLayoutFlags> src) {
if (!src.has_value())
return std::nullopt;
switch (*src) {
- case IREE::Input::DescriptorSetLayoutFlags::None:
- return IREE::HAL::DescriptorSetLayoutFlags::None;
- case IREE::Input::DescriptorSetLayoutFlags::Indirect:
- return IREE::HAL::DescriptorSetLayoutFlags::Indirect;
+ case IREE::Input::PipelineLayoutFlags::None:
+ return IREE::HAL::PipelineLayoutFlags::None;
+ case IREE::Input::PipelineLayoutFlags::Indirect:
+ return IREE::HAL::PipelineLayoutFlags::Indirect;
default:
return std::nullopt;
}
}
-static IREE::HAL::DescriptorSetLayoutAttr
-convertDescriptorSetLayout(IREE::Input::DescriptorSetLayoutAttr src) {
- return IREE::HAL::DescriptorSetLayoutAttr::get(
- src.getContext(), src.getOrdinal(),
- convertAttributes<IREE::HAL::DescriptorSetBindingAttr>(
- src.getBindings(), convertDescriptorSetBinding),
- convertDescriptorSetLayoutFlags(src.getFlags()));
-}
-
static IREE::HAL::PipelineLayoutAttr
convertPipelineLayout(IREE::Input::PipelineLayoutAttr src) {
return IREE::HAL::PipelineLayoutAttr::get(
- src.getContext(), src.getPushConstants(),
- convertAttributes<IREE::HAL::DescriptorSetLayoutAttr>(
- src.getSetLayouts(), convertDescriptorSetLayout));
+ src.getContext(),
+ convertAttributes<IREE::HAL::PipelineBindingAttr>(src.getBindings(),
+ convertPipelineBinding),
+ src.getConstants(), convertPipelineLayoutFlags(src.getFlags()));
}
static IREE::HAL::ExecutableObjectAttr
diff --git a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
index d9cff44..0c48dbc 100644
--- a/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
+++ b/compiler/src/iree/compiler/InputConversion/Common/test/iree_import_public.mlir
@@ -385,11 +385,11 @@
// -----
// CHECK: #[[PTX:.*]] = #hal.executable.target<"cuda", "cuda-nvptx-fb">
-// CHECK: #[[LAYOUT:.*]] = #hal.pipeline.layout<push_constants = 1,
-// CHECK-SAME: sets = [<0, bindings = [
-// CHECK-SAME: <0, storage_buffer, ReadOnly>,
-// CHECK-SAME: <1, storage_buffer>
-// CHECK-SAME: ]>]>
+// CHECK: #[[LAYOUT:.*]] = #hal.pipeline.layout<constants = 1,
+// CHECK-SAME: bindings = [
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// CHECK-SAME: #hal.pipeline.binding<storage_buffer>
+// CHECK-SAME: ]>
// CHECK: hal.executable.source private @executable
// CHECK-SAME: {objects = #hal.executable.objects<{
@@ -409,11 +409,9 @@
}>
} {
iree_input.executable.export public @add ordinal(0)
- layout(#iree_input.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#iree_input.pipeline.layout<constants = 1, bindings = [
+ #iree_input.pipeline.binding<storage_buffer, ReadOnly>,
+ #iree_input.pipeline.binding<storage_buffer>
]>) attributes {
workgroup_size = [64 : index, 1 : index, 1 : index]
}
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
index a048e79..a4f60ef 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/InlineExecutables.cpp
@@ -104,17 +104,14 @@
// Build dispatch function signature that the stream.cmd.dispatch ops will
// map to.
auto layoutAttr = exportOp.getLayout();
- size_t totalBindingCount = 0;
- for (auto setLayout : layoutAttr.getSetLayouts()) {
- totalBindingCount += setLayout.getBindings().size();
- }
+ size_t bindingCount = layoutAttr.getBindings().size();
SmallVector<Type> inputTypes;
inputTypes.append(exportOp.getWorkgroupCountBody()->getNumArguments() - 1,
indexType); // workload
- inputTypes.append(layoutAttr.getPushConstants(), i32Type);
- inputTypes.append(totalBindingCount, bufferType); // buffers
- inputTypes.append(totalBindingCount, indexType); // offsets
- inputTypes.append(totalBindingCount, indexType); // lengths
+ inputTypes.append(layoutAttr.getConstants(), i32Type);
+ inputTypes.append(bindingCount, bufferType); // buffers
+ inputTypes.append(bindingCount, indexType); // offsets
+ inputTypes.append(bindingCount, indexType); // lengths
auto dispatchFuncType =
innerModuleBuilder.getFunctionType(inputTypes, {});
@@ -136,13 +133,13 @@
return exportOp.emitOpError("missing body function");
}
if (bodyFuncOp.isPublic()) {
- if (failed(rewriteWorkgroupSignature(layoutAttr, totalBindingCount,
+ if (failed(rewriteWorkgroupSignature(layoutAttr, bindingCount,
bodyFuncOp))) {
return failure();
}
bodyFuncOp.setPrivate(); // so we only do it once
}
- buildDispatchFunc(exportOp, layoutAttr, totalBindingCount, bodyFuncOp,
+ buildDispatchFunc(exportOp, layoutAttr, bindingCount, bodyFuncOp,
dispatchFuncOp);
// Map from what the stream.cmd.dispatch ops is using to the new function.
@@ -185,7 +182,7 @@
// about the function signatures.
LogicalResult
rewriteWorkgroupSignature(IREE::HAL::PipelineLayoutAttr layoutAttr,
- size_t totalBindingCount,
+ size_t bindingCount,
FunctionOpInterface bodyFuncOp) {
auto *entryBlock = &bodyFuncOp.front();
auto builder = OpBuilder::atBlockBegin(entryBlock);
@@ -209,10 +206,10 @@
// Expand push constants by replacing buffer accesses with the flattened
// args.
- newArgTypes.append(layoutAttr.getPushConstants(), i32Type);
+ newArgTypes.append(layoutAttr.getConstants(), i32Type);
auto constantBuffer = entryBlock->getArgument(argOffset++);
SmallVector<Value> constantArgs;
- for (unsigned i = 0; i < layoutAttr.getPushConstants(); ++i) {
+ for (unsigned i = 0; i < layoutAttr.getConstants(); ++i) {
constantArgs.push_back(
entryBlock->addArgument(i32Type, constantBuffer.getLoc()));
}
@@ -221,10 +218,10 @@
}
// Expand buffer list by replacing list accesses with the flattened args.
- newArgTypes.append(totalBindingCount, bufferType);
+ newArgTypes.append(bindingCount, bufferType);
auto bindingList = entryBlock->getArgument(argOffset++);
SmallVector<Value> bindingArgs;
- for (unsigned i = 0; i < totalBindingCount; ++i) {
+ for (unsigned i = 0; i < bindingCount; ++i) {
bindingArgs.push_back(
entryBlock->addArgument(bufferType, bindingList.getLoc()));
}
@@ -329,7 +326,7 @@
// Builds a function that calls a workgroup body and marshals arguments.
//
// Incoming:
- // (workload..., push_constants...,
+ // (workload..., constants...,
// binding_buffers..., binding_offsets..., binding_lengths...)
// Body (as translated):
// (local_memory, [constants], [bindings],
@@ -338,8 +335,7 @@
// workgroup_count_x, workgroup_count_y, workgroup_count_z)
void buildDispatchFunc(IREE::HAL::ExecutableExportOp exportOp,
IREE::HAL::PipelineLayoutAttr layoutAttr,
- size_t totalBindingCount,
- FunctionOpInterface bodyFuncOp,
+ size_t bindingCount, FunctionOpInterface bodyFuncOp,
FunctionOpInterface dispatchFuncOp) {
auto loc = exportOp.getLoc();
auto builder = OpBuilder::atBlockBegin(dispatchFuncOp.addEntryBlock());
@@ -369,18 +365,18 @@
workgroupArgs.push_back(localMemory);
// Pass all constants through.
- for (int64_t i = 0; i < layoutAttr.getPushConstants(); ++i) {
+ for (int64_t i = 0; i < layoutAttr.getConstants(); ++i) {
workgroupArgs.push_back(dispatchFuncOp.getArgument(argOffset++));
}
// Pass all buffers through as subspans with the binding offset and length
// factored in. IPO can propagate the subspans (hopefully).
- for (size_t i = 0; i < totalBindingCount; ++i) {
+ for (size_t i = 0; i < bindingCount; ++i) {
auto bindingBuffer = dispatchFuncOp.getArgument(argOffset + i);
auto bindingOffset =
- dispatchFuncOp.getArgument(argOffset + totalBindingCount + i);
- auto bindingLength = dispatchFuncOp.getArgument(
- argOffset + totalBindingCount + totalBindingCount + i);
+ dispatchFuncOp.getArgument(argOffset + bindingCount + i);
+ auto bindingLength = dispatchFuncOp.getArgument(argOffset + bindingCount +
+ bindingCount + i);
Value bufferSize =
builder.create<IREE::Util::BufferSizeOp>(loc, bindingBuffer);
Value bindingView = builder.create<IREE::Util::BufferSubspanOp>(
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
index 1ee7e25..4dff570 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
@@ -8,14 +8,11 @@
hal.executable private @ex {
hal.executable.variant public @vmvx_ir target(<"vmvx-inline", "vmvx-ir">) {
hal.executable.export public @dispatch_0 ordinal(0) layout(
- #hal.pipeline.layout<push_constants = 2,
- sets = [
- <0, bindings = [
- <0, storage_buffer>,
- <1, storage_buffer>,
- <2, storage_buffer>
- ]>
- ]>) {
+ #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%arg0: !hal.device, %workload_x: index, %workload_y: index):
%count_x = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%workload_x]
%count_y = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%workload_y]
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp
index 8574102..687830d 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/HALLoaderToVM/Patterns.cpp
@@ -89,19 +89,19 @@
castToI32(adaptor.getWorkgroupY(), rewriter),
castToI32(adaptor.getWorkgroupZ(), rewriter),
};
- auto pushConstants = adaptor.getPushConstants();
+ auto constants = adaptor.getConstants();
SmallVector<int16_t, 5> segmentSizes = {
/*executable=*/-1,
/*entry_point=*/-1,
/*workgroup_x=*/-1,
/*workgroup_y=*/-1,
/*workgroup_z=*/-1,
- /*push_constants=*/
- static_cast<int16_t>(pushConstants.size()),
+ /*constants=*/
+ static_cast<int16_t>(constants.size()),
/*bindings=*/
static_cast<int16_t>(adaptor.getBindingBuffers().size()),
};
- callOperands.append(pushConstants.begin(), pushConstants.end());
+ callOperands.append(constants.begin(), constants.end());
for (auto [bindingBuffer, bindingOffset, bindingLength] : llvm::zip_equal(
adaptor.getBindingBuffers(), adaptor.getBindingOffsets(),
adaptor.getBindingLengths())) {
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
index a767492..9370e09 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/Conversion/StreamToHALLoader/test/cmd_ops.mlir
@@ -3,13 +3,9 @@
// NOTE: all other stream.cmd.* ops are handled by the hal_inline conversions.
// Executables are required to translate the dispatch calls.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<4, storage_buffer>
- ]>,
- #hal.descriptor_set.layout<1, bindings = [
- #hal.descriptor_set.binding<5, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable private @ex {
hal.executable.variant public @variant target(#hal.executable.target<"llvm", "embedded-elf-x86_64">) {
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td
index aa045b5..ac7bb0e 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/IR/HALLoaderOps.td
@@ -144,7 +144,7 @@
HAL_Dim:$workgroup_x,
HAL_Dim:$workgroup_y,
HAL_Dim:$workgroup_z,
- Variadic<I32>:$push_constants,
+ Variadic<I32>:$constants,
Variadic<Util_BufferType>:$binding_buffers,
Variadic<HAL_DeviceSize>:$binding_offsets,
Variadic<HAL_DeviceSize>:$binding_lengths
@@ -158,7 +158,7 @@
$workgroup_y `,`
$workgroup_z
`]` `)`
- (`constants` `(` `[` $push_constants^ `]` `)`)?
+ (`constants` `(` `[` $constants^ `]` `)`)?
`bindings` `(` `[`
custom<DispatchBindings>($binding_buffers,
type($binding_buffers),
diff --git a/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir b/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir
index d811bc7..a76e2d2 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Loader/hal_loader.imports.mlir
@@ -32,7 +32,7 @@
%workgroup_x : i32,
%workgroup_y : i32,
%workgroup_z : i32,
- %push_constants : i32 ...,
+ %constants : i32 ...,
// <buffer, offset, length>
%bindings : tuple<!vm.buffer, i64, i64>...
)
diff --git a/docs/website/docs/community/blog/posts/microkernels.md b/docs/website/docs/community/blog/posts/microkernels.md
index a948807..7e14195 100644
--- a/docs/website/docs/community/blog/posts/microkernels.md
+++ b/docs/website/docs/community/blog/posts/microkernels.md
@@ -467,9 +467,9 @@
%53 = arith.shli %52, %c32_i64 : i64
%54 = arith.ori %51, %53 : i64
%55 = arith.index_castui %54 : i64 to index
- %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%30, %35}
- %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%40, %45}
- %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%50, %55}
+ %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%30, %35}
+ %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x?x16x1xf32>>{%40, %45}
+ %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : !flow.dispatch.tensor<readwrite:tensor<?x?x16x16xf32>>{%50, %55}
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
@@ -566,11 +566,11 @@
%53 = arith.shli %52, %c32_i64 : i64
%54 = arith.ori %51, %53 : i64
%55 = arith.index_castui %54 : i64 to index
- %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?x?x16x1xf32, #hal.descriptor_type<storage_buffer>>{%30, %35}
+ %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?x?x16x1xf32, #hal.descriptor_type<storage_buffer>>{%30, %35}
memref.assume_alignment %56, 64 : memref<?x?x16x1xf32, #hal.descriptor_type<storage_buffer>>
- %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%40, %45}
+ %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%40, %45}
memref.assume_alignment %57, 1 : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
- %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%50, %55}
+ %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>{%50, %55}
memref.assume_alignment %58, 1 : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
@@ -657,11 +657,11 @@
%53 = arith.shli %52, %c32_i64 : i64
%54 = arith.ori %51, %53 : i64
%55 = arith.index_castui %54 : i64 to index
- %56 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?x?x16x1xf32>{%30, %35}
+ %56 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : memref<?x?x16x1xf32>{%30, %35}
memref.assume_alignment %56, 64 : memref<?x?x16x1xf32>
- %57 = hal.interface.binding.subspan layout(#layout) set(0) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>>{%40, %45}
+ %57 = hal.interface.binding.subspan layout(#layout) binding(0) alignment(64) offset(%20) flags(ReadOnly) : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>>{%40, %45}
memref.assume_alignment %57, 1 : memref<?x?x16x1xf32, strided<[?, 16, 1, 1], offset: ?>>
- %58 = hal.interface.binding.subspan layout(#layout) set(0) binding(1) alignment(64) offset(%25) : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>>{%50, %55}
+ %58 = hal.interface.binding.subspan layout(#layout) binding(1) alignment(64) offset(%25) : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>>{%50, %55}
memref.assume_alignment %58, 1 : memref<?x?x16x16xf32, strided<[?, 256, 16, 1], offset: ?>>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/experimental/webgpu/command_buffer.c b/experimental/webgpu/command_buffer.c
index 1654beb..9240320 100644
--- a/experimental/webgpu/command_buffer.c
+++ b/experimental/webgpu/command_buffer.c
@@ -147,12 +147,12 @@
// Currently open pass - NULL if no open pass.
WGPUComputePassEncoder compute_pass;
- // All available push constants updated each time push_constants is called.
+ // All available push constants updated each time constants is called.
// Reset only with the command buffer and otherwise will maintain its values
- // during recording to allow for partial push_constants updates.
- uint32_t push_constants[IREE_HAL_WEBGPU_MAX_PUSH_CONSTANT_COUNT];
+ // during recording to allow for partial constants updates.
+ uint32_t constants[IREE_HAL_WEBGPU_MAX_PUSH_CONSTANT_COUNT];
- // TODO(benvanik): add a push_constants dirty bit so we know if we need to
+ // TODO(benvanik): add a constants dirty bit so we know if we need to
// upload more. Today we'll stage the same values for each dispatch.
// Snapshot of descriptor sets as populated by push_descriptor_set.
@@ -750,7 +750,7 @@
return iree_ok_status();
}
-static iree_status_t iree_hal_webgpu_command_buffer_push_constants(
+static iree_status_t iree_hal_webgpu_command_buffer_constants(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_pipeline_layout_t* pipeline_layout, iree_host_size_t offset,
const void* values, iree_host_size_t values_length) {
@@ -758,7 +758,7 @@
iree_hal_webgpu_command_buffer_cast(base_command_buffer);
if (IREE_UNLIKELY(offset + values_length >=
- sizeof(command_buffer->state.push_constants))) {
+ sizeof(command_buffer->state.constants))) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"push constant range %" PRIhsz " (length=%" PRIhsz
") out of range",
@@ -766,7 +766,7 @@
}
// NOTE: command buffer state change only; enqueues no tasks.
- memcpy((uint8_t*)&command_buffer->state.push_constants + offset, values,
+ memcpy((uint8_t*)&command_buffer->state.constants + offset, values,
values_length);
return iree_ok_status();
@@ -819,14 +819,14 @@
// Upload push constant data - this may incur a segment flush if the staging
// buffer is exhausted.
- iree_host_size_t push_constant_count =
- iree_hal_webgpu_pipeline_layout_push_constant_count(entry_point->layout);
- iree_const_byte_span_t push_constant_data = iree_make_const_byte_span(
- command_buffer->state.push_constants,
- push_constant_count * sizeof(command_buffer->state.push_constants[0]));
+ iree_host_size_t constant_count =
+ iree_hal_webgpu_pipeline_layout_constant_count(entry_point->layout);
+ iree_const_byte_span_t constant_data = iree_make_const_byte_span(
+ command_buffer->state.constants,
+ constant_count * sizeof(command_buffer->state.constants[0]));
uint32_t params_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_append_parameters(
- command_buffer, push_constant_data, ¶ms_offset));
+ command_buffer, constant_data, ¶ms_offset));
// Acquire the compute pass we'll encode the dispatch into - this may be
// fresh or reused from prior commands.
@@ -835,7 +835,7 @@
command_buffer, &compute_pass));
wgpuComputePassEncoderSetPipeline(compute_pass, entry_point->pipeline);
- if (push_constant_count > 0) {
+ if (constant_count > 0) {
// Bind the push constant emulation bind group at the staging buffer
// relative offset for this dispatch.
wgpuComputePassEncoderSetBindGroup(
@@ -872,7 +872,7 @@
command_buffer->state.bind_groups_empty &= ~(1ull << i);
}
- if (push_constant_count > 0) {
+ if (constant_count > 0) {
// Pad up to IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX with empty bind groups.
WGPUBindGroup empty_handle =
command_buffer->staging_buffer->empty_bind_group;
@@ -1045,7 +1045,7 @@
.fill_buffer = iree_hal_webgpu_command_buffer_fill_buffer,
.update_buffer = iree_hal_webgpu_command_buffer_update_buffer,
.copy_buffer = iree_hal_webgpu_command_buffer_copy_buffer,
- .push_constants = iree_hal_webgpu_command_buffer_push_constants,
+ .constants = iree_hal_webgpu_command_buffer_constants,
.push_descriptor_set = iree_hal_webgpu_command_buffer_push_descriptor_set,
.dispatch = iree_hal_webgpu_command_buffer_dispatch,
.dispatch_indirect = iree_hal_webgpu_command_buffer_dispatch_indirect,
diff --git a/experimental/webgpu/pipeline_layout.c b/experimental/webgpu/pipeline_layout.c
index a5c940c..c55fcbe 100644
--- a/experimental/webgpu/pipeline_layout.c
+++ b/experimental/webgpu/pipeline_layout.c
@@ -159,7 +159,7 @@
iree_hal_resource_t resource;
iree_allocator_t host_allocator;
WGPUPipelineLayout handle;
- iree_host_size_t push_constant_count;
+ iree_host_size_t constant_count;
iree_hal_webgpu_set_binding_info_t set_binding_info;
iree_host_size_t set_layout_count;
iree_hal_descriptor_set_layout_t* set_layouts[];
@@ -177,7 +177,7 @@
iree_status_t iree_hal_webgpu_pipeline_layout_create(
WGPUDevice device, iree_host_size_t set_layout_count,
iree_hal_descriptor_set_layout_t* const* set_layouts,
- iree_host_size_t push_constant_count,
+ iree_host_size_t constant_count,
iree_hal_webgpu_staging_buffer_t* staging_buffer,
iree_allocator_t host_allocator,
iree_hal_pipeline_layout_t** out_pipeline_layout) {
@@ -198,8 +198,8 @@
// Pad to IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX for push constant emulation.
iree_host_size_t bind_group_layouts_count =
- push_constant_count > 0 ? IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX + 1
- : set_layout_count;
+ constant_count > 0 ? IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX + 1
+ : set_layout_count;
// Populate a WGPUBindGroupLayout array with the provided set layouts, then
// set the staging buffer's bind group layout at the right index, padding
@@ -215,7 +215,7 @@
*iree_inline_array_at(bind_group_layouts, i) =
staging_buffer->empty_bind_group_layout;
}
- if (push_constant_count > 0) {
+ if (constant_count > 0) {
*iree_inline_array_at(bind_group_layouts,
IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX) =
staging_buffer->bind_group_layout;
@@ -247,7 +247,7 @@
&pipeline_layout->resource);
pipeline_layout->host_allocator = host_allocator;
pipeline_layout->handle = handle;
- pipeline_layout->push_constant_count = push_constant_count;
+ pipeline_layout->constant_count = constant_count;
pipeline_layout->set_layout_count = set_layout_count;
pipeline_layout->set_binding_info.set_count = set_layout_count;
@@ -292,10 +292,10 @@
return iree_hal_webgpu_pipeline_layout_cast(layout)->handle;
}
-iree_host_size_t iree_hal_webgpu_pipeline_layout_push_constant_count(
+iree_host_size_t iree_hal_webgpu_pipeline_layout_constant_count(
iree_hal_pipeline_layout_t* layout) {
IREE_ASSERT_ARGUMENT(layout);
- return iree_hal_webgpu_pipeline_layout_cast(layout)->push_constant_count;
+ return iree_hal_webgpu_pipeline_layout_cast(layout)->constant_count;
}
const iree_hal_webgpu_set_binding_info_t*
diff --git a/experimental/webgpu/pipeline_layout.h b/experimental/webgpu/pipeline_layout.h
index e15b3bf..c620f9c 100644
--- a/experimental/webgpu/pipeline_layout.h
+++ b/experimental/webgpu/pipeline_layout.h
@@ -61,7 +61,7 @@
iree_status_t iree_hal_webgpu_pipeline_layout_create(
WGPUDevice device, iree_host_size_t set_layout_count,
iree_hal_descriptor_set_layout_t* const* set_layouts,
- iree_host_size_t push_constant_count,
+ iree_host_size_t constant_count,
iree_hal_webgpu_staging_buffer_t* staging_buffer,
iree_allocator_t host_allocator,
iree_hal_pipeline_layout_t** out_pipeline_layout);
@@ -69,7 +69,7 @@
WGPUPipelineLayout iree_hal_webgpu_pipeline_layout_handle(
iree_hal_pipeline_layout_t* layout);
-iree_host_size_t iree_hal_webgpu_pipeline_layout_push_constant_count(
+iree_host_size_t iree_hal_webgpu_pipeline_layout_constant_count(
iree_hal_pipeline_layout_t* layout);
const iree_hal_webgpu_set_binding_info_t*
diff --git a/experimental/webgpu/webgpu_device.c b/experimental/webgpu/webgpu_device.c
index 165dee3..5498caf 100644
--- a/experimental/webgpu/webgpu_device.c
+++ b/experimental/webgpu/webgpu_device.c
@@ -295,13 +295,13 @@
}
static iree_status_t iree_hal_webgpu_device_create_pipeline_layout(
- iree_hal_device_t* base_device, iree_host_size_t push_constants,
+ iree_hal_device_t* base_device, iree_host_size_t constants,
iree_host_size_t set_layout_count,
iree_hal_descriptor_set_layout_t* const* set_layouts,
iree_hal_pipeline_layout_t** out_pipeline_layout) {
iree_hal_webgpu_device_t* device = iree_hal_webgpu_device_cast(base_device);
return iree_hal_webgpu_pipeline_layout_create(
- device->handle, set_layout_count, set_layouts, push_constants,
+ device->handle, set_layout_count, set_layouts, constants,
&device->staging_buffer, device->host_allocator, out_pipeline_layout);
}
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td
index 526cdd6..223dcdc 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputBase.td
@@ -265,68 +265,49 @@
let cppNamespace = "::mlir::iree_compiler::IREE::Input";
}
-def IREEInput_DescriptorSetBindingAttr :
- AttrDef<IREEInput_Dialect, "DescriptorSetBinding", []> {
- let mnemonic = "descriptor_set.binding";
+def IREEInput_PipelineBindingAttr :
+ AttrDef<IREEInput_Dialect, "PipelineBinding", []> {
+ let mnemonic = "pipeline.binding";
let summary = [{descriptor set binding specification}];
let parameters = (ins
- AttrParameter<"int64_t", "">:$ordinal,
AttrParameter<"DescriptorType", "">:$type,
OptionalParameter<"std::optional<DescriptorFlags>">:$flags
);
let assemblyFormat = [{
- `<` $ordinal `,` $type (`,` $flags^)? `>`
+ `<` $type (`,` $flags^)? `>`
}];
}
-def IREEInput_DescriptorSetLayoutFlags_None :
+def IREEInput_PipelineLayoutFlags_None :
I32BitEnumAttrCase<"None", 0x0000>;
-def IREEInput_DescriptorSetLayoutFlags_Indirect :
+def IREEInput_PipelineLayoutFlags_Indirect :
I32BitEnumAttrCase<"Indirect", 0x0001>;
-def IREEInput_DescriptorSetLayoutFlagsAttr :
- I32BitEnumAttr<"DescriptorSetLayoutFlags", "valid DescriptorSetLayout flags", [
- IREEInput_DescriptorSetLayoutFlags_None,
- IREEInput_DescriptorSetLayoutFlags_Indirect,
+def IREEInput_PipelineLayoutFlagsAttr :
+ I32BitEnumAttr<"PipelineLayoutFlags", "valid PipelineLayout flags", [
+ IREEInput_PipelineLayoutFlags_None,
+ IREEInput_PipelineLayoutFlags_Indirect,
]> {
let cppNamespace = "::mlir::iree_compiler::IREE::Input";
}
-def IREEInput_DescriptorSetLayoutAttr :
- AttrDef<IREEInput_Dialect, "DescriptorSetLayout", []> {
- let mnemonic = "descriptor_set.layout";
- let summary = [{descriptor set layout specification}];
-
- let parameters = (ins
- AttrParameter<"int64_t", "">:$ordinal,
- ArrayRefParameter<"DescriptorSetBindingAttr", "">:$bindings,
- OptionalParameter<"std::optional<DescriptorSetLayoutFlags>">:$flags
- );
-
- let assemblyFormat = [{
- `<`
- $ordinal `,`
- `bindings` `=` `[` $bindings `]`
- (`,` `flags` `=` $flags^)?
- `>`
- }];
-}
-
def IREEInput_PipelineLayoutAttr :
AttrDef<IREEInput_Dialect, "PipelineLayout", []> {
let mnemonic = "pipeline.layout";
let summary = [{executable entry point layout specification}];
let parameters = (ins
- AttrParameter<"int64_t", "">:$pushConstants,
- ArrayRefParameter<"DescriptorSetLayoutAttr", "">:$setLayouts
+ ArrayRefParameter<"PipelineBindingAttr", "">:$bindings,
+ OptionalParameter<"int64_t", "0">:$constants,
+ OptionalParameter<"std::optional<PipelineLayoutFlags>">:$flags
);
let assemblyFormat = [{
`<`
- `push_constants` `=` $pushConstants `,`
- `sets` `=` `[` $setLayouts `]`
+ (`constants` `=` $constants^ `,`)?
+ `bindings` `=` `[` qualified($bindings) `]`
+ (`,` `flags` `=` $flags^)?
`>`
}];
}
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h
index 41ecf9f..0ae1d30 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/Input/InputDialect.h
@@ -48,24 +48,24 @@
template <>
struct FieldParser<
- std::optional<mlir::iree_compiler::IREE::Input::DescriptorSetLayoutFlags>> {
- static FailureOr<mlir::iree_compiler::IREE::Input::DescriptorSetLayoutFlags>
+ std::optional<mlir::iree_compiler::IREE::Input::PipelineLayoutFlags>> {
+ static FailureOr<mlir::iree_compiler::IREE::Input::PipelineLayoutFlags>
parse(AsmParser &parser) {
std::string value;
if (parser.parseKeywordOrString(&value))
return failure();
auto result = mlir::iree_compiler::IREE::Input::symbolizeEnum<
- mlir::iree_compiler::IREE::Input::DescriptorSetLayoutFlags>(value);
+ mlir::iree_compiler::IREE::Input::PipelineLayoutFlags>(value);
if (!result.has_value())
return failure();
return result.value();
}
};
-static inline AsmPrinter &operator<<(
- AsmPrinter &printer,
- std::optional<mlir::iree_compiler::IREE::Input::DescriptorSetLayoutFlags>
- param) {
+static inline AsmPrinter &
+operator<<(AsmPrinter &printer,
+ std::optional<mlir::iree_compiler::IREE::Input::PipelineLayoutFlags>
+ param) {
printer << (param.has_value()
? mlir::iree_compiler::IREE::Input::stringifyEnum(
param.value())
diff --git a/runtime/src/iree/hal/cts/CMakeLists.txt b/runtime/src/iree/hal/cts/CMakeLists.txt
index 715f02e..1e7ea8f 100644
--- a/runtime/src/iree/hal/cts/CMakeLists.txt
+++ b/runtime/src/iree/hal/cts/CMakeLists.txt
@@ -10,8 +10,8 @@
"command_buffer"
"command_buffer_copy_buffer"
"command_buffer_dispatch"
+ "command_buffer_dispatch_constants"
"command_buffer_fill_buffer"
- "command_buffer_push_constants"
"command_buffer_update_buffer"
"driver"
"event"
@@ -27,7 +27,7 @@
# connected to a functional compiler target, these tests can be skipped.
set(IREE_EXECUTABLE_CTS_TESTS
"command_buffer_dispatch"
- "command_buffer_push_constants"
+ "command_buffer_dispatch_constants"
"executable_cache"
PARENT_SCOPE
)
@@ -35,7 +35,7 @@
# List of testdata/{name}.mlir source files.
set(IREE_ALL_CTS_EXECUTABLE_SOURCES
"command_buffer_dispatch_test"
- "command_buffer_push_constants_test"
+ "command_buffer_dispatch_constants_test"
"executable_cache_test"
PARENT_SCOPE
)
@@ -120,9 +120,9 @@
iree_cc_library(
NAME
- command_buffer_fill_buffer_test_library
+ command_buffer_dispatch_constants_test_library
HDRS
- "command_buffer_fill_buffer_test.h"
+ "command_buffer_dispatch_constants_test.h"
DEPS
::cts_test_base
iree::base
@@ -133,9 +133,9 @@
iree_cc_library(
NAME
- command_buffer_push_constants_test_library
+ command_buffer_fill_buffer_test_library
HDRS
- "command_buffer_push_constants_test.h"
+ "command_buffer_fill_buffer_test.h"
DEPS
::cts_test_base
iree::base
diff --git a/runtime/src/iree/hal/cts/command_buffer_push_constants_test.h b/runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h
similarity index 91%
rename from runtime/src/iree/hal/cts/command_buffer_push_constants_test.h
rename to runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h
index 3652082..d53c7b6 100644
--- a/runtime/src/iree/hal/cts/command_buffer_push_constants_test.h
+++ b/runtime/src/iree/hal/cts/command_buffer_dispatch_constants_test.h
@@ -4,8 +4,8 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#ifndef IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_
-#define IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_
+#ifndef IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_
+#define IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_
#include "iree/base/api.h"
#include "iree/base/string_view.h"
@@ -18,7 +18,7 @@
using ::testing::ContainerEq;
-class CommandBufferPushConstantsTest : public CTSTestBase<> {
+class CommandBufferDispatchConstantsTest : public CTSTestBase<> {
protected:
void PrepareExecutable() {
IREE_ASSERT_OK(iree_hal_executable_cache_create(
@@ -32,7 +32,7 @@
executable_params.executable_format =
iree_make_cstring_view(get_test_executable_format());
executable_params.executable_data = get_test_executable_data(
- iree_make_cstring_view("command_buffer_push_constants_test.bin"));
+ iree_make_cstring_view("command_buffer_dispatch_constants_test.bin"));
// No executable-level "specialization constants" (not to be confused with
// per-dispatch varying "push constants").
executable_params.constant_count = 0;
@@ -53,7 +53,7 @@
iree_hal_executable_t* executable_ = NULL;
};
-TEST_F(CommandBufferPushConstantsTest, DispatchWithPushConstants) {
+TEST_F(CommandBufferDispatchConstantsTest, DispatchWithDispatchConstants) {
ASSERT_NO_FATAL_FAILURE(PrepareExecutable());
iree_hal_command_buffer_t* command_buffer = NULL;
@@ -130,4 +130,4 @@
} // namespace iree::hal::cts
-#endif // IREE_HAL_CTS_COMMAND_BUFFER_PUSH_CONSTANTS_TEST_H_
+#endif // IREE_HAL_CTS_COMMAND_BUFFER_DISPATCH_CONSTANTS_TEST_H_
diff --git a/runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir
similarity index 72%
rename from runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir
rename to runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir
index df041bd..6369fc4 100644
--- a/runtime/src/iree/hal/cts/testdata/command_buffer_push_constants_test.mlir
+++ b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_constants_test.mlir
@@ -1,25 +1,23 @@
// This program writes push constant values into an output buffer.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 4, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 4, bindings = [
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable.source public @executable {
- hal.executable.export public @write_push_constants ordinal(0) layout(#pipeline_layout) attributes {workgroup_size = [1 : index, 1 : index, 1 : index]} {
+ hal.executable.export public @write_constants ordinal(0) layout(#pipeline_layout) attributes {workgroup_size = [1 : index, 1 : index, 1 : index]} {
^bb0(%arg0: !hal.device):
%c1 = arith.constant 1 : index
hal.return %c1, %c1, %c1 : index, index, index
}
builtin.module {
- func.func @write_push_constants() {
+ func.func @write_constants() {
%input_0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%input_1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%input_2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : i32
%input_3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i32
- %out = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) : memref<4xi32>
+ %out = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) : memref<4xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
diff --git a/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir
index 136879d..d816c15 100644
--- a/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir
+++ b/runtime/src/iree/hal/cts/testdata/command_buffer_dispatch_test.mlir
@@ -5,11 +5,9 @@
// return %result : tensor<2xf32>
// }
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable.source public @executable {
@@ -22,8 +20,8 @@
func.func @abs() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(4) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(4) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(4) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<2xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(4) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [2], strides = [1] : !flow.dispatch.tensor<readonly:tensor<2xf32>> -> tensor<2xf32>
%3 = tensor.empty() : tensor<2xf32>
diff --git a/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir
index 43e33be..6553a44 100644
--- a/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir
+++ b/runtime/src/iree/hal/cts/testdata/executable_cache_test.mlir
@@ -5,11 +5,9 @@
// return %result : tensor<f32>
// }
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable.source public @executable {
@@ -22,8 +20,8 @@
func.func @abs() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:f32>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:f32>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) offset(%c0) : !flow.dispatch.tensor<readonly:f32>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%c0) : !flow.dispatch.tensor<writeonly:f32>
%2 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:f32> -> tensor<f32>
%3 = tensor.empty() : tensor<f32>
diff --git a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
index 0da43bb..0e60669 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
@@ -712,8 +712,8 @@
iree_hal_local_executable_t* executable;
int32_t ordinal;
- // Total number of available 4 byte push constant values in |push_constants|.
- uint16_t push_constant_count;
+ // Total number of available 4 byte push constant values in |constants|.
+ uint16_t constant_count;
// Total number of binding base pointers in |binding_ptrs| and
// |binding_lengths|. The set is packed densely based on which bindings are
@@ -721,7 +721,7 @@
uint16_t binding_count;
// Following this structure in memory there are 3 tables:
- // - const uint32_t push_constants[push_constant_count];
+ // - const uint32_t constants[constant_count];
// - void* binding_ptrs[binding_count];
// - const size_t binding_lengths[binding_count];
} iree_hal_task_cmd_dispatch_t;
@@ -742,7 +742,7 @@
.workgroup_size_x = tile_context->workgroup_size[0],
.workgroup_size_y = tile_context->workgroup_size[1],
.workgroup_size_z = tile_context->workgroup_size[2],
- .push_constant_count = cmd->push_constant_count,
+ .constant_count = cmd->constant_count,
.workgroup_count_x = tile_context->workgroup_count[0],
.workgroup_count_y = tile_context->workgroup_count[1],
.workgroup_count_z = tile_context->workgroup_count[2],
@@ -751,8 +751,8 @@
.binding_count = cmd->binding_count,
};
uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd);
- dispatch_state.push_constants = (uint32_t*)cmd_ptr;
- cmd_ptr += cmd->push_constant_count * sizeof(*dispatch_state.push_constants);
+ dispatch_state.constants = (uint32_t*)cmd_ptr;
+ cmd_ptr += cmd->constant_count * sizeof(*dispatch_state.constants);
dispatch_state.binding_ptrs = (void**)cmd_ptr;
cmd_ptr += cmd->binding_count * sizeof(*dispatch_state.binding_ptrs);
dispatch_state.binding_lengths = (size_t*)cmd_ptr;
@@ -802,7 +802,7 @@
cmd->executable = local_executable;
cmd->ordinal = entry_point;
- cmd->push_constant_count = dispatch_attrs.constant_count;
+ cmd->constant_count = dispatch_attrs.constant_count;
cmd->binding_count = dispatch_attrs.binding_count;
// TODO(benvanik): expose on API or keep fixed on executable.
@@ -835,10 +835,10 @@
constants.data_length / sizeof(uint32_t));
}
uint8_t* cmd_ptr = (uint8_t*)cmd + sizeof(*cmd);
- uint32_t* push_constants = (uint32_t*)cmd_ptr;
- memcpy(push_constants, constants.data,
- dispatch_attrs.constant_count * sizeof(*push_constants));
- cmd_ptr += dispatch_attrs.constant_count * sizeof(*push_constants);
+ uint32_t* constants_ptr = (uint32_t*)cmd_ptr;
+ memcpy(constants_ptr, constants.data,
+ dispatch_attrs.constant_count * sizeof(*constants_ptr));
+ cmd_ptr += dispatch_attrs.constant_count * sizeof(*constants_ptr);
// Produce the dense binding list based on the declared bindings used.
//
diff --git a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir
index fafa77b..07df2a2 100644
--- a/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir
+++ b/runtime/src/iree/hal/local/elf/testdata/elementwise_mul.mlir
@@ -19,12 +19,10 @@
// --binding=4xf32=0,0,0,0
// lhs * rhs => dst / s0b0 * s0b1 => s0b2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// A single executable source definition is allowed per translation in this mode
@@ -47,9 +45,9 @@
// exported.
builtin.module {
func.func @elementwise_mul() {
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %dst = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %dst = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
%workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
diff --git a/runtime/src/iree/hal/local/executable_library.h b/runtime/src/iree/hal/local/executable_library.h
index 62b87d8..f9da209 100644
--- a/runtime/src/iree/hal/local/executable_library.h
+++ b/runtime/src/iree/hal/local/executable_library.h
@@ -279,8 +279,8 @@
uint32_t workgroup_size_y;
uint16_t workgroup_size_z;
- // Total number of available 4 byte push constant values in |push_constants|.
- uint16_t push_constant_count;
+ // Total number of available 4 byte push constant values in |constants|.
+ uint16_t constant_count;
// Total workgroup count for the dispatch. This is sourced from either the
// original dispatch call (for iree_hal_command_buffer_dispatch) or the
@@ -299,8 +299,8 @@
// used (known at compile-time).
uint8_t binding_count;
- // |push_constant_count| values.
- const uint32_t* push_constants;
+ // |constant_count| values.
+ const uint32_t* constants;
// Base pointers to each binding buffer.
void* const* binding_ptrs;
// The length of each binding in bytes, 1:1 with |binding_ptrs|.
diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.c b/runtime/src/iree/hal/local/executable_library_benchmark.c
index 93134fe..8988b5d 100644
--- a/runtime/src/iree/hal/local/executable_library_benchmark.c
+++ b/runtime/src/iree/hal/local/executable_library_benchmark.c
@@ -51,45 +51,44 @@
// Parsed parameters from flags.
// Used to construct the dispatch parameters for the benchmark invocation.
struct {
- int32_t push_constant_count;
+ int32_t constant_count;
union {
uint32_t ui32;
- } push_constants[IREE_HAL_EXECUTABLE_MAX_CONSTANT_COUNT];
+ } constants[IREE_HAL_EXECUTABLE_MAX_CONSTANT_COUNT];
int32_t binding_count;
iree_string_view_t bindings[IREE_HAL_EXECUTABLE_MAX_BINDING_COUNT];
} dispatch_params = {
- .push_constant_count = 0,
+ .constant_count = 0,
.binding_count = 0,
};
-static iree_status_t parse_push_constant(iree_string_view_t flag_name,
- void* storage,
- iree_string_view_t value) {
- IREE_ASSERT_LE(dispatch_params.push_constant_count + 1,
- IREE_ARRAYSIZE(dispatch_params.push_constants),
+static iree_status_t parse_constant(iree_string_view_t flag_name, void* storage,
+ iree_string_view_t value) {
+ IREE_ASSERT_LE(dispatch_params.constant_count + 1,
+ IREE_ARRAYSIZE(dispatch_params.constants),
"too many push constants");
- dispatch_params.push_constants[dispatch_params.push_constant_count++].ui32 =
+ dispatch_params.constants[dispatch_params.constant_count++].ui32 =
atoi(value.data);
return iree_ok_status();
}
-static void print_push_constant(iree_string_view_t flag_name, void* storage,
- FILE* file) {
- if (dispatch_params.push_constant_count == 0) {
+static void print_constant(iree_string_view_t flag_name, void* storage,
+ FILE* file) {
+ if (dispatch_params.constant_count == 0) {
fprintf(file, "# --%.*s=[integer value]\n", (int)flag_name.size,
flag_name.data);
return;
}
- for (int32_t i = 0; i < dispatch_params.push_constant_count; ++i) {
+ for (int32_t i = 0; i < dispatch_params.constant_count; ++i) {
fprintf(file, "--%.*s=%u", (int)flag_name.size, flag_name.data,
- dispatch_params.push_constants[i].ui32);
- if (i < dispatch_params.push_constant_count - 1) {
+ dispatch_params.constants[i].ui32);
+ if (i < dispatch_params.constant_count - 1) {
fprintf(file, "\n");
}
}
}
-IREE_FLAG_CALLBACK(parse_push_constant, print_push_constant, &dispatch_params,
- push_constant_callback,
+IREE_FLAG_CALLBACK(parse_constant, print_constant, &dispatch_params,
+ constant_callback,
"Appends a uint32_t push constant value.\n");
static iree_status_t parse_binding(iree_string_view_t flag_name, void* storage,
@@ -217,8 +216,8 @@
.workgroup_size_y = FLAG_workgroup_size_y,
.workgroup_size_z = FLAG_workgroup_size_z,
.max_concurrency = FLAG_max_concurrency,
- .push_constant_count = dispatch_params.push_constant_count,
- .push_constants = &dispatch_params.push_constants[0].ui32,
+ .constant_count = dispatch_params.constant_count,
+ .constants = &dispatch_params.constants[0].ui32,
.binding_count = dispatch_params.binding_count,
.binding_ptrs = binding_ptrs,
.binding_lengths = binding_lengths,
diff --git a/runtime/src/iree/hal/local/executable_library_benchmark.md b/runtime/src/iree/hal/local/executable_library_benchmark.md
index 1e7d59a..07bfa31 100644
--- a/runtime/src/iree/hal/local/executable_library_benchmark.md
+++ b/runtime/src/iree/hal/local/executable_library_benchmark.md
@@ -205,7 +205,7 @@
5. Look up in the IR to see the values of push constants, if required:
```mlir
- hal.command_buffer.push_constants<%cmd : !hal.command_buffer>
+ hal.command_buffer.constants<%cmd : !hal.command_buffer>
layout(%0 : !hal.pipeline_layout)
offset(0)
values(%c1, %c2, %c3, %c4) : i32, i32, i32, i32
@@ -216,8 +216,8 @@
things like this but in cases where you know the meaning you can provide values:
```
---push_constant=1
---push_constant=2
---push_constant=3
---push_constant=4
+--constant=1
+--constant=2
+--constant=3
+--constant=4
```
diff --git a/runtime/src/iree/hal/local/executable_library_demo.c b/runtime/src/iree/hal/local/executable_library_demo.c
index 300d645..bb03027 100644
--- a/runtime/src/iree/hal/local/executable_library_demo.c
+++ b/runtime/src/iree/hal/local/executable_library_demo.c
@@ -22,17 +22,17 @@
// communication between invocations must use the buffer bindings for I/O.
//
// This is a simple scalar addition:
-// binding[1] = binding[0] + push_constant[0]
+// binding[1] = binding[0] + constant[0]
static int dispatch_tile_a(
const iree_hal_executable_environment_v0_t* environment,
const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
const iree_hal_executable_workgroup_state_v0_t* workgroup_state) {
- const dispatch_tile_a_push_constants_t* push_constants =
- (const dispatch_tile_a_push_constants_t*)dispatch_state->push_constants;
+ const dispatch_tile_a_constants_t* constants =
+ (const dispatch_tile_a_constants_t*)dispatch_state->constants;
const float* src = ((const float*)dispatch_state->binding_ptrs[0]);
float* dst = ((float*)dispatch_state->binding_ptrs[1]);
const uint32_t x = workgroup_state->workgroup_id_x;
- dst[x] = src[x] + push_constants->f0;
+ dst[x] = src[x] + constants->f0;
return 0;
}
diff --git a/runtime/src/iree/hal/local/executable_library_demo.h b/runtime/src/iree/hal/local/executable_library_demo.h
index f458768..1ebcfe9 100644
--- a/runtime/src/iree/hal/local/executable_library_demo.h
+++ b/runtime/src/iree/hal/local/executable_library_demo.h
@@ -26,14 +26,14 @@
struct {
float f0;
};
-} dispatch_tile_a_push_constants_t;
+} dispatch_tile_a_constants_t;
// Returns a simple demo library with the following structure:
//
// Name: 'demo_library'
//
// [0] 'dispatch_tile_a': matmul+div
-// push constants: 1 (dispatch_tile_a_push_constants_t)
+// push constants: 1 (dispatch_tile_a_constants_t)
// bindings: 2
// [0] = R
// [1] = W
diff --git a/runtime/src/iree/hal/local/executable_library_test.c b/runtime/src/iree/hal/local/executable_library_test.c
index cb35448..b6ffc36 100644
--- a/runtime/src/iree/hal/local/executable_library_test.c
+++ b/runtime/src/iree/hal/local/executable_library_test.c
@@ -58,9 +58,9 @@
// to specify (no buffer pointer indirection) and more efficient to access
// (static struct offset address calculation, all fit in a few cache lines,
// etc). They are limited in capacity, though, so only <=64(ish) are usable.
- dispatch_tile_a_push_constants_t push_constants;
- memset(&push_constants, 0, sizeof(push_constants));
- push_constants.f0 = 5.0f;
+ dispatch_tile_a_constants_t constants;
+ memset(&constants, 0, sizeof(constants));
+ constants.f0 = 5.0f;
// Setup the two buffer bindings the entry point is expecting.
// They only need to remain valid for the duration of the invocation and all
@@ -90,8 +90,8 @@
.workgroup_size_y = 1,
.workgroup_size_z = 1,
.max_concurrency = 1,
- .push_constant_count = IREE_ARRAYSIZE(push_constants.values),
- .push_constants = push_constants.values,
+ .constant_count = IREE_ARRAYSIZE(constants.values),
+ .constants = constants.values,
.binding_count = IREE_ARRAYSIZE(binding_ptrs),
.binding_ptrs = binding_ptrs,
.binding_lengths = binding_lengths,
diff --git a/runtime/src/iree/hal/local/inline_command_buffer.c b/runtime/src/iree/hal/local/inline_command_buffer.c
index c002d22..7ea8513 100644
--- a/runtime/src/iree/hal/local/inline_command_buffer.c
+++ b/runtime/src/iree/hal/local/inline_command_buffer.c
@@ -29,7 +29,7 @@
struct {
// Cached and initialized dispatch state reused for all dispatches.
// Individual dispatches must populate the dynamically changing fields like
- // push_constant_count and binding_count.
+ // constant_count and binding_count.
iree_alignas(64) iree_hal_executable_dispatch_state_v0_t dispatch_state;
// Persistent storage for binding pointers used by dispatch_state.
void* binding_ptr_storage[IREE_HAL_EXECUTABLE_MAX_BINDING_COUNT];
@@ -397,8 +397,8 @@
(uint32_t)dispatch_attrs.constant_count,
constants.data_length / sizeof(uint32_t));
}
- dispatch_state->push_constant_count = dispatch_attrs.constant_count;
- dispatch_state->push_constants = (const uint32_t*)constants.data;
+ dispatch_state->constant_count = dispatch_attrs.constant_count;
+ dispatch_state->constants = (const uint32_t*)constants.data;
// Produce the dense binding list based on the declared bindings used.
//
diff --git a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
index bfdb1db..78663ba 100644
--- a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
+++ b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
@@ -434,9 +434,8 @@
iree_vm_buffer_t constants_buffer;
iree_vm_buffer_initialize(
IREE_VM_BUFFER_ACCESS_ORIGIN_HOST,
- iree_make_byte_span(
- (void*)dispatch_state->push_constants,
- sizeof(uint32_t) * dispatch_state->push_constant_count),
+ iree_make_byte_span((void*)dispatch_state->constants,
+ sizeof(uint32_t) * dispatch_state->constant_count),
iree_allocator_null(), &constants_buffer);
// Prepare call argument buffer. We've verified the signature on creation and
diff --git a/runtime/src/iree/modules/hal/loader/module.c b/runtime/src/iree/modules/hal/loader/module.c
index c88aaba..94baa95 100644
--- a/runtime/src/iree/modules/hal/loader/module.c
+++ b/runtime/src/iree/modules/hal/loader/module.c
@@ -222,8 +222,8 @@
};
iree_vm_abi_riiii_t params;
};
- iree_vm_size_t push_constant_count;
- const uint32_t* push_constants;
+ iree_vm_size_t constant_count;
+ const uint32_t* constants;
iree_vm_size_t binding_count;
const iree_vm_abi_rII_t* bindings;
} iree_hal_loader_dispatch_args_t;
@@ -264,13 +264,13 @@
.workgroup_size_x = 1,
.workgroup_size_y = 1,
.workgroup_size_z = 1,
- .push_constant_count = args->push_constant_count,
+ .constant_count = args->constant_count,
.workgroup_count_x = args->workgroup_x,
.workgroup_count_y = args->workgroup_y,
.workgroup_count_z = args->workgroup_z,
.max_concurrency = 1,
.binding_count = args->binding_count,
- .push_constants = args->push_constants,
+ .constants = args->constants,
.binding_ptrs = binding_ptrs,
.binding_lengths = binding_lengths,
};
@@ -302,13 +302,12 @@
.params = *(const iree_vm_abi_riiii_t*)args_storage.data,
};
if (args_ok) {
- const uint8_t* push_constants_ptr = args_storage.data + sizeof(args.params);
- args.push_constant_count = *(const iree_vm_size_t*)push_constants_ptr;
- args.push_constants =
- (const uint32_t*)(push_constants_ptr + sizeof(iree_vm_size_t));
+ const uint8_t* constants_ptr = args_storage.data + sizeof(args.params);
+ args.constant_count = *(const iree_vm_size_t*)constants_ptr;
+ args.constants = (const uint32_t*)(constants_ptr + sizeof(iree_vm_size_t));
const uint8_t* bindings_ptr =
- push_constants_ptr + sizeof(iree_vm_size_t) +
- args.push_constant_count * sizeof(args.push_constants[0]);
+ constants_ptr + sizeof(iree_vm_size_t) +
+ args.constant_count * sizeof(args.constants[0]);
args.binding_count = *(const iree_vm_size_t*)bindings_ptr;
args.bindings =
(const iree_vm_abi_rII_t*)(bindings_ptr + sizeof(iree_vm_size_t));
diff --git a/samples/custom_dispatch/cpu/embedded/example_hal.mlir b/samples/custom_dispatch/cpu/embedded/example_hal.mlir
index 5bb5299..64b246b 100644
--- a/samples/custom_dispatch/cpu/embedded/example_hal.mlir
+++ b/samples/custom_dispatch/cpu/embedded/example_hal.mlir
@@ -45,19 +45,15 @@
#x86_64_target
]> : !hal.device
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+#pipeline_layout_0 = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
-#pipeline_layout_1 = #hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+#pipeline_layout_1 = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
module @example attributes {hal.device.targets = [#cpu_target]} {
@@ -96,19 +92,7 @@
// The ordinal must be assigned by the user and unique for the executable.
// The layout defines the required bindings and push constants and can be
// thought of as the function signature.
- hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout_0) attributes {
- // Bindings are automatically inferred when possible as part of the
- // ABI but can be overridden if the user wants to use features such
- // as sparse bindings or multiple descriptor sets. To do so the
- // `hal.interface.bindings` attribute can be added to a dispatch op
- // as follows mapping tensor operands/results to the pipeline layout
- // sets/bindings:
- hal.interface.bindings = [
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>,
- #hal.interface.binding<0, 2>
- ]
- } {
+ hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout_0) {
^bb0(%device: !hal.device, %workload: index):
// This host function is used to compute the XYZ workgroup count
// dispatched at runtime. It can query the %device for capabilities
@@ -186,9 +170,9 @@
%tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x]
// Bindings are accessed by reference.
- %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
- %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
- %binding2 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(2) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding2 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(2) alignment(64) offset(%c0) : memref<?xf32>{%dim}
// Call the externally defined C function with an (almost) plain C
// calling convention (see above for details about the mess memrefs
@@ -220,8 +204,8 @@
%tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x]
// Same as above but note that we're treating %binding1 as read/write.
- %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
- %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding0 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding1 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
func.call @simple_mul_inplace_workgroup(%binding0, %binding1, %dim, %tid) : (memref<?xf32>, memref<?xf32>, index, index) -> ()
@@ -273,10 +257,6 @@
// to allow user-controlled overrides of the dispatches, custom selection
// logic based on runtime parameters, etc. In general, though, the above
// automatic selection should be used.
- //
- // Note that we don't declare the hal.interface.bindings and let them be
- // inferred - this only works when either specifying the variant that has
- // a pipeline layout defined or all variants have the same pipeline layouts.
%2 = flow.dispatch @executable::@x86_64::@simple_mul_inplace[%dim](%dim_i32, %0, %1) : (i32, tensor<?xf32>{%dim}, tensor<?xf32>{%dim}) -> %1{%dim}
// CHECK: 8xf32=96 96 96 96 96 96 96 96
diff --git a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
index b04e075..159c95f 100644
--- a/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
+++ b/samples/custom_dispatch/cpu/embedded/example_transform_spec.mlir
@@ -20,12 +20,10 @@
#x86_64_target
]>
-#pipeline_layout = #hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>
module attributes {transform.with_named_sequence} {
@@ -56,9 +54,9 @@
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%tid = affine.apply affine_map<()[s0] -> (s0 * 64)>()[%workgroup_id_x]
- %binding0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
- %binding1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
- %binding2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : memref<?xf32>{%dim}
+ %binding2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : memref<?xf32>{%dim}
func.call @simple_mul_abs_negate_workgroup(%binding0, %binding1, %binding2, %dim, %tid) : (memref<?xf32>, memref<?xf32>, memref<?xf32>, index, index) -> ()
return
diff --git a/samples/custom_dispatch/cpu/embedded/functions.c b/samples/custom_dispatch/cpu/embedded/functions.c
index a962666..495fd6d 100644
--- a/samples/custom_dispatch/cpu/embedded/functions.c
+++ b/samples/custom_dispatch/cpu/embedded/functions.c
@@ -36,12 +36,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
void simple_mul_workgroup(
@@ -64,11 +62,9 @@
// `rhs *= lhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
void simple_mul_inplace_workgroup(
@@ -89,12 +85,10 @@
// `ret = -|lhs * rhs|`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
void simple_mul_abs_negate_workgroup(
diff --git a/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c b/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c
index 88417a6..4fe4c4e 100644
--- a/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c
+++ b/samples/custom_dispatch/cpu/mlp_plugin/mlp_plugin.c
@@ -32,12 +32,10 @@
// `ret = mlp(lhs, rhs)`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
//
diff --git a/samples/custom_dispatch/cpu/plugin/standalone_plugin.c b/samples/custom_dispatch/cpu/plugin/standalone_plugin.c
index 038666f..0305fb1 100644
--- a/samples/custom_dispatch/cpu/plugin/standalone_plugin.c
+++ b/samples/custom_dispatch/cpu/plugin/standalone_plugin.c
@@ -24,12 +24,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
//
diff --git a/samples/custom_dispatch/cpu/plugin/system_plugin.c b/samples/custom_dispatch/cpu/plugin/system_plugin.c
index 816daac..86ac02f 100644
--- a/samples/custom_dispatch/cpu/plugin/system_plugin.c
+++ b/samples/custom_dispatch/cpu/plugin/system_plugin.c
@@ -42,12 +42,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// With a workgroup size of 64x1x1.
//
diff --git a/samples/custom_dispatch/cuda/kernels/README.md b/samples/custom_dispatch/cuda/kernels/README.md
index ac4e99d..51decc8 100644
--- a/samples/custom_dispatch/cuda/kernels/README.md
+++ b/samples/custom_dispatch/cuda/kernels/README.md
@@ -71,12 +71,10 @@
]
}>
hal.executable.export public @simple_mul ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {workgroup_size = [64 : index, 1 : index, 1 : index]} {
^bb0(%device: !hal.device, %workload: index):
%x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload]
diff --git a/samples/custom_dispatch/cuda/kernels/example.mlir b/samples/custom_dispatch/cuda/kernels/example.mlir
index 62e49c6..aaba1af 100644
--- a/samples/custom_dispatch/cuda/kernels/example.mlir
+++ b/samples/custom_dispatch/cuda/kernels/example.mlir
@@ -75,27 +75,14 @@
// The layout defines the required bindings and push constants and can be
// thought of as the function signature.
hal.executable.export public @simple_mul ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
// Certain backends (like CUDA) require a workgroup size (aka block
// size) to be defined ahead of time.
- workgroup_size = [64 : index, 1 : index, 1 : index],
- // Bindings are automatically inferred when possible as part of the ABI
- // but can be overridden if the user wants to use features such as sparse
- // bindings or multiple descriptor sets. To do so the
- // `hal.interface.bindings` attribute can be added to a dispatch op as
- // follows mapping tensor operands/results to the pipeline layout
- // sets/bindings:
- hal.interface.bindings = [
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>,
- #hal.interface.binding<0, 2>
- ]
+ workgroup_size = [64 : index, 1 : index, 1 : index]
} {
^bb0(%device: !hal.device, %workload: index):
// This host function is used to compute the XYZ workgroup count
@@ -110,11 +97,9 @@
// Similar to the above but in-place by using a read/write binding.
hal.executable.export public @simple_mul_inplace ordinal(1)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
workgroup_size = [64 : index, 1 : index, 1 : index]
} {
@@ -153,10 +138,6 @@
%1 = arith.addf %0, %arg1 : tensor<?xf32>
// Dispatch an in-place `rhs *= lhs` kernel.
- //
- // Note that we don't declare the hal.interface.bindings and let them be
- // inferred - this only works when either specifying the variant that has
- // a pipeline layout defined or all variants have the same pipeline layouts.
%2 = flow.dispatch @executable::@simple_mul_inplace[%dim](%dim_i32, %0, %1) : (i32, tensor<?xf32>{%dim}, tensor<?xf32>{%dim}) -> %1{%dim}
// CHECK: 8xf32=96 96 96 96 96 96 96 96
diff --git a/samples/custom_dispatch/cuda/kernels/kernels.cu b/samples/custom_dispatch/cuda/kernels/kernels.cu
index 250f676..8bca00e 100644
--- a/samples/custom_dispatch/cuda/kernels/kernels.cu
+++ b/samples/custom_dispatch/cuda/kernels/kernels.cu
@@ -36,12 +36,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// workgroup_size = [64 : index, 1 : index, 1 : index]
extern "C" __global__ void simple_mul(const float* __restrict__ binding0,
@@ -56,11 +54,9 @@
// `rhs *= lhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// workgroup_size = [64 : index, 1 : index, 1 : index]
extern "C" __global__ void simple_mul_inplace(
diff --git a/samples/custom_dispatch/hip/kernels/example.mlir b/samples/custom_dispatch/hip/kernels/example.mlir
index 6a2148c..a24a999 100644
--- a/samples/custom_dispatch/hip/kernels/example.mlir
+++ b/samples/custom_dispatch/hip/kernels/example.mlir
@@ -66,27 +66,14 @@
// The layout defines the required bindings and push constants and can be
// thought of as the function signature.
hal.executable.export public @simple_mul ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
// Certain backends (like ROCM) require a workgroup size (aka block
// size) to be defined ahead of time.
- workgroup_size = [64 : index, 1 : index, 1 : index],
- // Bindings are automatically inferred when possible as part of the ABI
- // but can be overridden if the user wants to use features such as sparse
- // bindings or multiple descriptor sets. To do so the
- // `hal.interface.bindings` attribute can be added to a dispatch op as
- // follows mapping tensor operands/results to the pipeline layout
- // sets/bindings:
- hal.interface.bindings = [
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>,
- #hal.interface.binding<0, 2>
- ]
+ workgroup_size = [64 : index, 1 : index, 1 : index]
} {
^bb0(%device: !hal.device, %workload: index):
// This host function is used to compute the XYZ workgroup count
@@ -101,11 +88,9 @@
// Similar to the above but in-place by using a read/write binding.
hal.executable.export public @simple_mul_inplace ordinal(1)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) attributes {
workgroup_size = [64 : index, 1 : index, 1 : index]
} {
diff --git a/samples/custom_dispatch/hip/kernels/kernels.cu b/samples/custom_dispatch/hip/kernels/kernels.cu
index 87e29ee..cc4762b 100644
--- a/samples/custom_dispatch/hip/kernels/kernels.cu
+++ b/samples/custom_dispatch/hip/kernels/kernels.cu
@@ -38,12 +38,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// workgroup_size = [64 : index, 1 : index, 1 : index]
extern "C" __global__ void simple_mul(const float* __restrict__ binding0,
@@ -58,11 +56,9 @@
// `rhs *= lhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
// workgroup_size = [64 : index, 1 : index, 1 : index]
extern "C" __global__ void simple_mul_inplace(
diff --git a/samples/custom_dispatch/vulkan/shaders/README.md b/samples/custom_dispatch/vulkan/shaders/README.md
index fb7406b..7e4129f 100644
--- a/samples/custom_dispatch/vulkan/shaders/README.md
+++ b/samples/custom_dispatch/vulkan/shaders/README.md
@@ -75,12 +75,10 @@
]
}>
hal.executable.export public @simple_mul ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) {
^bb0(%device: !hal.device, %workload: index):
%x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload]
diff --git a/samples/custom_dispatch/vulkan/shaders/example.mlir b/samples/custom_dispatch/vulkan/shaders/example.mlir
index d9cb5e1..17e2e45 100644
--- a/samples/custom_dispatch/vulkan/shaders/example.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example.mlir
@@ -76,25 +76,11 @@
// The layout defines the required bindings and push constants and can be
// thought of as the function signature.
hal.executable.export public @main ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
- ]>) attributes {
- // Bindings are automatically inferred when possible as part of the
- // ABI but can be overridden if the user wants to use features such as
- // sparse bindings or multiple descriptor sets. To do so the
- // `hal.interface.bindings` attribute can be added to an export op as
- // follows mapping tensor operands/results to the pipeline layout
- // sets/bindings:
- hal.interface.bindings = [
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>,
- #hal.interface.binding<0, 2>
- ]
- } {
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+ ]>) {
^bb0(%device: !hal.device, %workload: index):
// This host function is used to compute the XYZ workgroup count
// dispatched at runtime. It can query the %device for capabilities
@@ -119,11 +105,9 @@
} {
// Similar to the above but in-place by using a read/write binding.
hal.executable.export public @main ordinal(0)
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>) {
^bb0(%device: !hal.device, %workload: index):
%x = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%workload]
diff --git a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir
index 2882134..d6ef84f 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_inline.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_inline.mlir
@@ -67,24 +67,11 @@
}
// The layout defines the required bindings and push constants and can be
// thought of as the function signature.
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer, ReadOnly>,
- <2, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>)
- // Bindings are automatically inferred when possible as part of the ABI
- // but can be overridden if the user wants to use features such as sparse
- // bindings or multiple descriptor sets. To do so the
- // `hal.interface.bindings` attribute can be added to a dispatch op as
- // follows mapping tensor operands/results to the pipeline layout
- // sets/bindings:
- bindings([
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>,
- #hal.interface.binding<0, 2>
- ])
// Object files linked into the executable.
// Certain backends (today) support either wholesale definition or linking
// of partial objects for imports used by generated code. Each compilation
diff --git a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
index 8e23206..6d2cba8 100644
--- a/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
+++ b/samples/custom_dispatch/vulkan/shaders/example_transform_spec.mlir
@@ -36,15 +36,13 @@
%c1_0 = arith.constant 1 : index
hal.return %c1_0, %c1_0, %c1_0 : index, index, index
}
- layout(#hal.pipeline.layout<push_constants = 1, sets = [
- <0, bindings = [
- <0, storage_buffer, ReadOnly>,
- <1, storage_buffer>
- ]>
+ layout(#hal.pipeline.layout<constants = 1, bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
]>)
bindings([
- #hal.interface.binding<0, 0>,
- #hal.interface.binding<0, 1>
+ #hal.interface.binding<0>,
+ #hal.interface.binding<1>
])
objects({
#spirv_target ordinal(0) = [
diff --git a/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl b/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl
index 5a9c6f6..b5a563a 100644
--- a/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl
+++ b/samples/custom_dispatch/vulkan/shaders/one_workgroup_argmax_subgroup_f32.glsl
@@ -7,11 +7,9 @@
// `ret = argmax(in)`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
#version 450 core
diff --git a/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl b/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl
index ec40146..f2418a6 100644
--- a/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl
+++ b/samples/custom_dispatch/vulkan/shaders/simple_mul.glsl
@@ -7,12 +7,10 @@
// `ret = lhs * rhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer, ReadOnly>,
-// <2, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
#version 450
diff --git a/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl b/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl
index adc6d82..b24d9a2 100644
--- a/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl
+++ b/samples/custom_dispatch/vulkan/shaders/simple_mul_inplace.glsl
@@ -7,11 +7,9 @@
// `rhs *= lhs`
//
// Conforms to ABI:
-// #hal.pipeline.layout<push_constants = 1, sets = [
-// <0, bindings = [
-// <0, storage_buffer, ReadOnly>,
-// <1, storage_buffer>
-// ]>
+// #hal.pipeline.layout<constants = 1, bindings = [
+// #hal.pipeline.binding<storage_buffer, ReadOnly>,
+// #hal.pipeline.binding<storage_buffer>
// ]>
#version 450
diff --git a/samples/transform_dialect/example_module.mlir b/samples/transform_dialect/example_module.mlir
index 2fb3498..40f2fee 100644
--- a/samples/transform_dialect/example_module.mlir
+++ b/samples/transform_dialect/example_module.mlir
@@ -29,9 +29,19 @@
compute = fp32|int32, storage = b32, subgroup = none, dot = none, mma = [], subgroup_size_choices = [64, 64],
max_workgroup_sizes = [128, 128, 64], max_thread_count_per_workgroup = 128, max_workgroup_memory_bytes = 16384, max_workgroup_counts = [65535, 65535, 65535]>>
-#pipeline_layout_0 = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
-#pipeline_layout_1 = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>
-#pipeline_layout_2 = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>
+#pipeline_layout_0 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
+#pipeline_layout_1 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
+#pipeline_layout_2 = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer, ReadOnly>,
+ #hal.pipeline.binding<storage_buffer>
+]>
module attributes {
hal.device.targets = [
@@ -52,8 +62,8 @@
builtin.module {
func.func @example_module_dispatch_0_generic_80_f32() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<80xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout_0) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<80xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<80xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout_0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<80xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [80], strides = [1] : !flow.dispatch.tensor<readonly:tensor<80xf32>> -> tensor<80xf32>
%3 = tensor.empty() : tensor<80xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%2 : tensor<80xf32>) outs(%3 : tensor<80xf32>) {
@@ -77,9 +87,9 @@
builtin.module {
func.func @example_module_dispatch_1_matmul_16x16x5_f32() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x5xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5x16xf32>>
- %2 = hal.interface.binding.subspan layout(#pipeline_layout_1) set(0) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x5xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<5x16xf32>>
+ %2 = hal.interface.binding.subspan layout(#pipeline_layout_1) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x5xf32>> -> tensor<16x5xf32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [5, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<5x16xf32>> -> tensor<5x16xf32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
@@ -100,8 +110,8 @@
builtin.module {
func.func @example_module_dispatch_2_generic_16x16_f32() {
%c0 = arith.constant 0 : index
- %0 = hal.interface.binding.subspan layout(#pipeline_layout_2) set(0) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16xf32>>
- %1 = hal.interface.binding.subspan layout(#pipeline_layout_2) set(0) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x16xf32>>
+ %1 = hal.interface.binding.subspan layout(#pipeline_layout_2) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<16xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
%3 = tensor.empty() : tensor<16xf32>
%4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<16x16xf32>) outs(%3 : tensor<16xf32>) {
diff --git a/tests/compiler_driver/executable_benchmarks.mlir b/tests/compiler_driver/executable_benchmarks.mlir
index b1ee0ec..f1eb326 100644
--- a/tests/compiler_driver/executable_benchmarks.mlir
+++ b/tests/compiler_driver/executable_benchmarks.mlir
@@ -18,4 +18,4 @@
// CHECK: vm.rodata private @abs_dispatch_0_vmvx_bytecode_fb
// CHECK: vm.func private @abs_dispatch_0_vmvx_bytecode_fb_abs_dispatch_0{{.+}}(%arg0: i32)
// CHECK-SAME: iree.reflection = {iree.benchmark = "dispatch"}
-// CHECK: vm.call @hal.command_buffer.dispatch
+// CHECK: vm.call.variadic @hal.command_buffer.dispatch
diff --git a/tests/compiler_driver/hal_executable.mlir b/tests/compiler_driver/hal_executable.mlir
index d3e9ed0..2e7dc2c 100644
--- a/tests/compiler_driver/hal_executable.mlir
+++ b/tests/compiler_driver/hal_executable.mlir
@@ -7,12 +7,10 @@
// push constants available and the descriptor sets and their bindings.
// Push constants are dense (0..N) while the sets/bindings are sparse and may
// contain unused or omitted entries.
-#pipeline_layout = #hal.pipeline.layout<push_constants = 2, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
// A single executable source definition is allowed per translation in this mode
@@ -39,9 +37,9 @@
// Bindings are dereferenced by their set/binding ordinal and may have a
// byte offset from the base of the descriptor. Alignment information when
// available can help code generation emit better loads/stores.
- %s0b0 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %s0b1 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) offset(%offset) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %s0b2 = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
+ %s0b0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %s0b1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) offset(%offset) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %s0b2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
// Workgroup information can be queried from the interface.
%workgroup_id_x = hal.interface.workgroup.id[0] : index
diff --git a/tests/compiler_driver/streams.mlir b/tests/compiler_driver/streams.mlir
index b1fe335..03ebbc3 100644
--- a/tests/compiler_driver/streams.mlir
+++ b/tests/compiler_driver/streams.mlir
@@ -54,7 +54,7 @@
// CHECK: vm.func private @simple_mul
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
%c4 = arith.constant 4 : index
- // CHECK: vm.call @hal.command_buffer.dispatch
+ // CHECK: vm.call.variadic @hal.command_buffer.dispatch
%ret0 = flow.dispatch @executable_0::@dispatch[%c4](%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
return %ret0 : tensor<4xf32>
}
@@ -101,7 +101,7 @@
// CHECK: vm.func private @simple_mul_inplace
func.func @simple_mul_inplace(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
%c4 = arith.constant 4 : index
- // CHECK: vm.call @hal.command_buffer.dispatch
+ // CHECK: vm.call.variadic @hal.command_buffer.dispatch
%ret0 = flow.dispatch @executable_1::@dispatch[%c4](%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> %arg0
return %ret0 : tensor<4xf32>
}
@@ -155,7 +155,7 @@
%arg0_dim0 = tensor.dim %arg0, %c0 : tensor<?xf32>
// CHECK: vm.call @hal.buffer_view.dim
%arg1_dim0 = tensor.dim %arg1, %c0 : tensor<?xf32>
- // CHECK: vm.call @hal.command_buffer.dispatch
+ // CHECK: vm.call.variadic @hal.command_buffer.dispatch
%ret0 = flow.dispatch @executable_2::@dispatch[%arg0_dim0](%arg0, %arg0_dim0, %arg1, %arg1_dim0) : (tensor<?xf32>{%arg0_dim0}, index, tensor<?xf32>{%arg1_dim0}, index) -> tensor<?xf32>{%arg0_dim0}
return %ret0 : tensor<?xf32>
}
diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt
index a307652..d5e67db 100644
--- a/tests/e2e/stablehlo_ops/CMakeLists.txt
+++ b/tests/e2e/stablehlo_ops/CMakeLists.txt
@@ -658,7 +658,7 @@
"dot.mlir"
"dot_general.mlir"
# "dynamic_slice.mlir" # TODO(#13702): update WebGPU to simplified bindings.
- "dynamic_update_slice.mlir"
+ # "dynamic_update_slice.mlir" # TODO(#13702): update WebGPU to simplified bindings.
"exponential.mlir"
"exponential_fp16.mlir"
"exponential_minus_one.mlir"
@@ -685,8 +685,8 @@
"rng_uniform.mlir"
"round.mlir"
"rsqrt.mlir"
- "scatter.mlir"
- "scatter_dynamic.mlir"
+ # "scatter.mlir" # TODO(#13702): update WebGPU to simplified bindings.
+ # "scatter_dynamic.mlir" # TODO(#13702): update WebGPU to simplified bindings.
"select.mlir"
"shape_assertion.mlir"
"sine.mlir"
diff --git a/tools/test/compile_to_phase.mlir b/tools/test/compile_to_phase.mlir
index f1861a0..db22ffa 100644
--- a/tools/test/compile_to_phase.mlir
+++ b/tools/test/compile_to_phase.mlir
@@ -41,12 +41,12 @@
// RUN: iree-compile --compile-to=vm --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=VM-PHASE
// VM-PHASE: vm.rodata private @abs_dispatch_0
-// VM-PHASE: vm.call @hal.command_buffer.dispatch
+// VM-PHASE: vm.call.variadic @hal.command_buffer.dispatch
// RUN: iree-compile --output-format=vm-asm --compile-to=end --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=END-PHASE
// RUN: iree-compile --output-format=vm-asm --iree-hal-target-device=local --iree-hal-local-target-device-backends=vmvx %s | FileCheck %s --check-prefix=END-PHASE
// END-PHASE: vm.rodata private @abs_dispatch_0
-// END-PHASE: vm.call @hal.command_buffer.dispatch
+// END-PHASE: vm.call.variadic @hal.command_buffer.dispatch
func.func @abs(%input : tensor<f32>) -> (tensor<f32>) {
%result = math.absf %input : tensor<f32>
diff --git a/tools/test/iree-benchmark-executable.mlir b/tools/test/iree-benchmark-executable.mlir
index 2950514..be8e093 100644
--- a/tools/test/iree-benchmark-executable.mlir
+++ b/tools/test/iree-benchmark-executable.mlir
@@ -34,12 +34,10 @@
// CHECK: BM_dispatch_512x1x1
// lhs * rhs => dst / s0b0 * s0b1 => s0b2
-#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [
- #hal.descriptor_set.layout<0, bindings = [
- #hal.descriptor_set.binding<0, storage_buffer>,
- #hal.descriptor_set.binding<1, storage_buffer>,
- #hal.descriptor_set.binding<2, storage_buffer>
- ]>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
]>
hal.executable.source public @executable {
hal.executable.export public @elementwise_mul ordinal(0) layout(#pipeline_layout) attributes {
@@ -52,9 +50,9 @@
}
builtin.module {
func.func @elementwise_mul() {
- %lhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %rhs = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
- %dst = hal.interface.binding.subspan layout(#pipeline_layout) set(0) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
+ %lhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %rhs = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(32) : !flow.dispatch.tensor<readonly:tensor<4xf32>>
+ %dst = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(32) : !flow.dispatch.tensor<writeonly:tensor<4xf32>>
// TODO(#16554): GPU/SPIR-V lowering doesn't handle workgroup size queries.
// %workgroup_size_x = hal.interface.workgroup.size[0] : index
%workgroup_size_x = arith.constant 1 : index