[LLVMGPU] Fallback if dynamic dim found on vector distribute. (#17085)
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index c9b7a1e..ebea4ac 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -507,14 +507,20 @@ // except the inner most m, n, and k dimensions to 1. int64_t mDim = contractionDims->m.back(); int64_t nDim = contractionDims->n.back(); + int64_t kDim = contractionDims->k.back(); + + // Dynamic dims are expected to be taken care of earlier in the pipeline. + if (ShapedType::isDynamic(bounds[mDim]) || + ShapedType::isDynamic(bounds[nDim]) || + ShapedType::isDynamic(bounds[kDim])) { + return failure(); + } // Bail out on matvec-like cases. if (bounds[mDim] == 1 || bounds[nDim] == 1) { return failure(); } - int64_t kDim = contractionDims->k.back(); - Value lhs = op.getDpsInputOperand(0)->get(); Value rhs = op.getDpsInputOperand(1)->get(); Value init = op.getDpsInitOperand(0)->get();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir index baa673e..2cab045 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute.mlir
@@ -202,3 +202,34 @@ // CHECK-LABEL: func.func @wmma_matmul_1024x1024x1024() // CHECK: linalg.matmul {{.*}}lowering_config = #[[$TILE_SIZES]] + +// ----- + +#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {mma_intrinsics = [#iree_gpu.mma_layout<MFMA_F16_16x16x16_F32>, #iree_gpu.mma_layout<MFMA_F16_32x32x8_F32>], target_arch = "gfx940"}> +module { + func.func @matmul_dynamic_dim() attributes {hal.executable.target = #executable_target_rocm_hsaco_fb} { + %c0 = arith.constant 0 : index + %c32_i64 = arith.constant 32 : i64 + %cst = arith.constant 0.000000e+00 : f32 + %0 = hal.interface.constant.load[0] : i32 + %1 = hal.interface.constant.load[1] : i32 + %2 = arith.extui %0 : i32 to i64 + %3 = arith.extui %1 : i32 to i64 + %4 = arith.shli %3, %c32_i64 : i64 + %5 = arith.ori %2, %4 : i64 + %6 = arith.index_castui %5 : i64 to index + %7 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> + %8 = flow.dispatch.workload.ordinal %6, 0 : index + %9 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<?x256xf16>>{%8} + %10 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x256xf32>>{%8} + %11 = flow.dispatch.tensor.load %9, offsets = [0, 0], sizes = [%8, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x256xf16>>{%8} -> tensor<?x256xf16> + %12 = flow.dispatch.tensor.load %7, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xf16>> -> tensor<256x256xf16> + %13 = tensor.empty(%8) : tensor<?x256xf32> + %14 = linalg.fill ins(%cst : f32) outs(%13 : tensor<?x256xf32>) -> tensor<?x256xf32> + %15 = linalg.matmul ins(%11, %12 : tensor<?x256xf16>, tensor<256x256xf16>) outs(%14 : tensor<?x256xf32>) -> tensor<?x256xf32> + flow.dispatch.tensor.store %15, %10, offsets = [0, 0], sizes = [%8, 256], strides = [1, 1] : tensor<?x256xf32> -> !flow.dispatch.tensor<writeonly:tensor<?x256xf32>>{%8} + return + } +} +// Check that we have unhandled dynamic dimension. +// CHECK-NOT: iree_codegen.translation_info<LLVMGPUVectorDistribute