[SPIRV] Add llvmpipe as known target (#24542)

Basic llvmpipe target for testing.

Signed-off-by: Tobias Fuchs <fuchs@roofline.ai>
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
index 28665a8..95fb3ae 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp
@@ -185,6 +185,24 @@
 }
 
 //===----------------------------------------------------------------------===//
+// LLVMPIPE
+//===----------------------------------------------------------------------===//
+const WgpDetails *getLLVMPIPEWgpDetails() {
+  ComputeBitwidths computeBitwdiths = allComputeBits;
+  // clang-format off
+  static const WgpDetails llvmpipeWgp = {
+      computeBitwdiths,   allStorageBits,     allSubgroupOps,  allDotProductOps,
+      /*mmaCount=*/0,     /*mmaOps=*/nullptr,
+      /*scaledMmaCount=*/0, /*scaledMmaOps=*/nullptr,
+      /*subgroupSizeChoices=*/{8, 8}, /*maxWorkgroupSizes=*/{1024, 1024, 1024},
+      /*maxThreadSize=*/1024,
+      /*maxWorkgroupMemoryBytes=*/32 * 1024, // Vulkan: maxComputeSharedMemorySize
+      /*maxWorkgroupCounts=*/{0xffff, 0xffff, 0xffff}};
+  // clang-format on
+  return &llvmpipeWgp;
+}
+
+//===----------------------------------------------------------------------===//
 // Known AMD target details
 //
 // Note: the max workgroup size is given as signed int32 max because MLIR's
@@ -1343,6 +1361,14 @@
     return createTargetAttr(*details, target,
                             /*features=*/"spirv:v1.6,cap:Shader", context);
   }
+  if (target == "llvmpipe") {
+    return createTargetAttr(
+        {getLLVMPIPEWgpDetails(), nullptr}, target,
+        /*features=*/
+        "spirv:v1.6,cap:Shader,cap:PhysicalStorageBufferAddresses,cap:"
+        "PhysicalStorageBuffer64,cap:Int64,cap:Float64",
+        context);
+  }
 
   // Go through common profiles if not hit in the above.
 
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel
index c2ef7ed..c56e7f0 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/BUILD.bazel
@@ -36,6 +36,7 @@
             "config_default_reduction.mlir",
             "config_default_softmax.mlir",
             "config_default_sub_byte_types.mlir",
+            "config_llvmpipe.mlir",
             "config_mali_conv.mlir",
             "config_mali_matmul.mlir",
             "config_nvidia_matmul.mlir",
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
index ff97a66..cc088ec 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
@@ -31,6 +31,7 @@
     "config_default_reduction.mlir"
     "config_default_softmax.mlir"
     "config_default_sub_byte_types.mlir"
+    "config_llvmpipe.mlir"
     "config_mali_conv.mlir"
     "config_mali_matmul.mlir"
     "config_nvidia_matmul.mlir"
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/config_llvmpipe.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_llvmpipe.mlir
new file mode 100644
index 0000000..7abbcc7
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/config_llvmpipe.mlir
@@ -0,0 +1,37 @@
+// RUN: iree-opt --split-input-file --iree-gpu-test-target=llvmpipe@vulkan --pass-pipeline='builtin.module(func.func(iree-codegen-gpu-generalize-named-ops),iree-spirv-select-lowering-strategy-pass)' %s | FileCheck %s
+
+// `llvmpipe` is a software (CPU) Vulkan implementation without vendor-specific
+// codegen, so it falls back to the default SPIR-V tile-and-vectorize path. These
+// tests check that the target resolves and produces a valid lowering strategy.
+
+func.func @matmul_1024x2048x512(%3: tensor<1024x512xf32>, %4: tensor<512x2048xf32>) -> tensor<1024x2048xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %5 = tensor.empty() : tensor<1024x2048xf32>
+  %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
+  %7 = linalg.matmul ins(%3, %4 : tensor<1024x512xf32>, tensor<512x2048xf32>) outs(%6 : tensor<1024x2048xf32>) -> tensor<1024x2048xf32>
+  return %7 : tensor<1024x2048xf32>
+}
+
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[16, 256], [8, 8], [0, 0, 4]{{\]}}>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = #iree_gpu.spirv_pipeline<BaseVectorize> workgroup_size = [32, 2, 1]>
+//      CHECK: func.func @matmul_1024x2048x512(
+// CHECK-SAME:     translation_info = #[[TRANSLATION]]
+//      CHECK:   linalg.generic
+// CHECK-SAME:       lowering_config = #[[CONFIG]]
+
+// -----
+
+func.func @conv_112x112x512(%3: tensor<1x225x225x3xf32>, %4: tensor<3x3x3x512xf32>) -> tensor<1x112x112x512xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %5 = tensor.empty() : tensor<1x112x112x512xf32>
+  %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
+  %7 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%3, %4 : tensor<1x225x225x3xf32>, tensor<3x3x3x512xf32>) outs(%6 : tensor<1x112x112x512xf32>) -> tensor<1x112x112x512xf32>
+  return %7 : tensor<1x112x112x512xf32>
+}
+
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 1, 8, 128], [1, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]{{\]}}>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = #iree_gpu.spirv_pipeline<BaseVectorize> workgroup_size = [32, 1, 1]>
+//      CHECK: func.func @conv_112x112x512(
+// CHECK-SAME:     translation_info = #[[TRANSLATION]]
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:       lowering_config = #[[CONFIG]]