Use workgroup_count_from_slice in Stream builtins (#18924)

`workgroup_count_from_dag_root` is planned to be replaced in the future
and is not supported by all codegen paths. Switch to
`workgroup_count_from_slice`.
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir
index 81f683f..af2286a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i16.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_fill_i16 {
   stream.executable.export public @__builtin_fill_i16 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_fill_i16(%value: i16, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi16>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi16>
       %1 = linalg.fill ins(%value : i16) outs(%0 : tensor<?xi16>) -> tensor<?xi16>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi16> -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi16> -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir
index 43b0829..758591f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i32.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_fill_i32 {
   stream.executable.export public @__builtin_fill_i32 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_fill_i32(%value: i32, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi32>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi32>
       %1 = linalg.fill ins(%value : i32) outs(%0 : tensor<?xi32>) -> tensor<?xi32>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir
index 7005ded..c2c642d 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/fill_i8.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_fill_i8 {
   stream.executable.export public @__builtin_fill_i8 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_fill_i8(%value: i8, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi8>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi8>
       %1 = linalg.fill ins(%value : i8) outs(%0 : tensor<?xi8>) -> tensor<?xi8>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i16.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i16.mlir
index a94cdf1..1397889 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i16.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i16.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_splat_i16 {
   stream.executable.export public @__builtin_splat_i16 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_splat_i16(%value: i16, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi16>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi16>
       %1 = linalg.fill ins(%value : i16) outs(%0 : tensor<?xi16>) -> tensor<?xi16>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi16> -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi16> -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i32.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i32.mlir
index 07f3b4c..a1f19b8 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i32.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i32.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_splat_i32 {
   stream.executable.export public @__builtin_splat_i32 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_splat_i32(%value: i32, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi32>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi32>
       %1 = linalg.fill ins(%value : i32) outs(%0 : tensor<?xi32>) -> tensor<?xi32>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i8.mlir b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i8.mlir
index 5e5f8cb..d0c6dc0 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i8.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Builtins/splat_i8.mlir
@@ -9,16 +9,17 @@
 
 stream.executable private @__builtin_splat_i8 {
   stream.executable.export public @__builtin_splat_i8 workgroups(%arg0: index) -> (index, index, index) {
-    %x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
+    %x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg0
     stream.return %x, %y, %z : index, index, index
   }
   builtin.module {
     func.func @__builtin_splat_i8(%value: i8, %count: index, %out_binding: !stream.binding) {
       %c0 = arith.constant 0 : index
-      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
-      %0 = tensor.empty(%count) : tensor<?xi8>
+      %count0 = flow.dispatch.workload.ordinal %count, 0 : index
+      %out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count0}
+      %0 = tensor.empty(%count0) : tensor<?xi8>
       %1 = linalg.fill ins(%value : i8) outs(%0 : tensor<?xi8>) -> tensor<?xi8>
-      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
+      flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count0], strides = [1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
       return
     }
   }