Integrate LLVM at llvm/llvm-project@7e78d89052b1

Updates LLVM usage to match
[7e78d89052b1](https://github.com/llvm/llvm-project/commit/7e78d89052b1)

PiperOrigin-RevId: 333090785
diff --git a/SUBMODULE_VERSIONS b/SUBMODULE_VERSIONS
index 47019a7..c2c671b 100644
--- a/SUBMODULE_VERSIONS
+++ b/SUBMODULE_VERSIONS
@@ -4,7 +4,7 @@
 a5d9d0f7d368054fd1691aedf1db4116efcc233e third_party/flatbuffers
 4fb0ff7069bd88ee85902f4d0bb62794e5f6d021 third_party/flatcc
 f2fb48c3b3d79a75a88a99fba6576b25d42ec528 third_party/googletest
-93fd30bac3345fea4f5beba3241f1ef4f2f5f419 third_party/llvm-project
+7e78d89052b15f32ea56f018698194c7c9627152 third_party/llvm-project
 17b12a4481daa150e2d1ea3ada086b551b856707 third_party/marl
 08c41f61f4f5f17728c9e0f48640eaa32588b63b third_party/mlir-emitc
 d8c7ee00a687ac369e62e2032514a93a9b413502 third_party/pybind11
diff --git a/experimental/ModelBuilder/ModelBuilder.cpp b/experimental/ModelBuilder/ModelBuilder.cpp
index 51169b6..d4151b2 100644
--- a/experimental/ModelBuilder/ModelBuilder.cpp
+++ b/experimental/ModelBuilder/ModelBuilder.cpp
@@ -104,7 +104,9 @@
        spirv::Extension::SPV_KHR_8bit_storage,
        spirv::Extension::SPV_KHR_16bit_storage},
       context);
-  return spirv::TargetEnvAttr::get(triple,
+  return spirv::TargetEnvAttr::get(triple, spirv::Vendor::Unknown,
+                                   spirv::DeviceType::Unknown,
+                                   spirv::TargetEnvAttr::kUnknownDeviceID,
                                    spirv::getDefaultResourceLimits(context));
 }
 
@@ -193,12 +195,13 @@
 
 Value ModelBuilder::FCBiasTanhTensors(RankedTensorType outputTensorType,
                                       std::array<Value, 2> fcArgs,
-                                      Value biasValueArg) {
+                                      Value fcInitTensor, Value biasValueArg) {
   //==========================================================================//
   // Layer 1: FC
   //==========================================================================//
   Value I = fcArgs[0], W = fcArgs[1];
-  Value O2 = linalg_generic_matmul(I, W, outputTensorType)->getResult(0);
+  Value O2 =
+      linalg_generic_matmul(I, W, fcInitTensor, outputTensorType)->getResult(0);
 
   //==========================================================================//
   // Layer 2: BiasAddTanh Block
diff --git a/experimental/ModelBuilder/ModelBuilder.h b/experimental/ModelBuilder/ModelBuilder.h
index 35ad4bd..fbb72cb 100644
--- a/experimental/ModelBuilder/ModelBuilder.h
+++ b/experimental/ModelBuilder/ModelBuilder.h
@@ -196,7 +196,7 @@
   // Version with a RankedTensor result.
   static Value FCBiasTanhTensors(RankedTensorType outputTensorType,
                                  std::array<Value, 2> fcArgs,
-                                 Value biasValueArg);
+                                 Value fcInitTensor, Value biasValueArg);
 
   // Build the MLIR representation for:
   //   `0.5f * tanh(0.5f * (x + bias)) + 0.5f`
diff --git a/experimental/ModelBuilder/test/TestMNISTJIT.cpp b/experimental/ModelBuilder/test/TestMNISTJIT.cpp
index 49676e2..3c006d3 100644
--- a/experimental/ModelBuilder/test/TestMNISTJIT.cpp
+++ b/experimental/ModelBuilder/test/TestMNISTJIT.cpp
@@ -111,35 +111,41 @@
   auto h1WeightsType = modelBuilder.getRankedTensorType({W0, W1}, f32);
   auto h2WeightsType = modelBuilder.getRankedTensorType({W1, W2}, f32);
   auto h3WeightsType = modelBuilder.getRankedTensorType({W2, W3}, f32);
+  auto b1InitType = modelBuilder.getRankedTensorType({B, W1}, f32);
+  auto b2InitType = modelBuilder.getRankedTensorType({B, W2}, f32);
+  auto b3InitType = modelBuilder.getRankedTensorType({B, W3}, f32);
   auto bias1Type = modelBuilder.getRankedTensorType({W1}, f32);
   auto bias2Type = modelBuilder.getRankedTensorType({W2}, f32);
   auto bias3Type = modelBuilder.getRankedTensorType({W3}, f32);
   auto outputType = modelBuilder.getRankedTensorType({B, W3}, f32);
   auto func = modelBuilder.makeFunction(
       funcName, {outputType},
-      {inputType, h1WeightsType, h2WeightsType, h3WeightsType, bias1Type,
-       bias2Type, bias3Type});
+      {inputType, h1WeightsType, h2WeightsType, h3WeightsType, b1InitType,
+       b2InitType, b3InitType, bias1Type, bias2Type, bias3Type});
   Value input = func.getArgument(0);
   Value h1Weights = func.getArgument(1);
   Value h2Weights = func.getArgument(2);
   Value h3Weights = func.getArgument(3);
-  Value bias1 = func.getArgument(4);
-  Value bias2 = func.getArgument(5);
-  Value bias3 = func.getArgument(6);
+  Value b1Init = func.getArgument(4);
+  Value b2Init = func.getArgument(5);
+  Value b3Init = func.getArgument(6);
+  Value bias1 = func.getArgument(7);
+  Value bias2 = func.getArgument(8);
+  Value bias3 = func.getArgument(9);
 
   // 2. Fill the body (3 blocks of FCBiasTanh), alloc everything manually atm.
   OpBuilder b(&func.getBody());
   ScopedContext scope(b, func.getLoc());
 
   auto outputBlock1Type = modelBuilder.getRankedTensorType({B, W1}, f32);
-  auto outputBlock1 = modelBuilder.FCBiasTanhTensors(outputBlock1Type,
-                                                     {input, h1Weights}, bias1);
+  auto outputBlock1 = modelBuilder.FCBiasTanhTensors(
+      outputBlock1Type, {input, h1Weights}, b1Init, bias1);
   auto outputBlock2Type = modelBuilder.getRankedTensorType({B, W2}, f32);
   auto outputBlock2 = modelBuilder.FCBiasTanhTensors(
-      outputBlock2Type, {outputBlock1, h2Weights}, bias2);
+      outputBlock2Type, {outputBlock1, h2Weights}, b2Init, bias2);
   auto outputBlock3Type = outputType;
   auto outputBlock3 = modelBuilder.FCBiasTanhTensors(
-      outputBlock3Type, {outputBlock2, h3Weights}, bias3);
+      outputBlock3Type, {outputBlock2, h3Weights}, b3Init, bias3);
   // Vexing parses.
   (std_ret(outputBlock3));
 }
@@ -199,6 +205,7 @@
       *static_cast<StridedMemRefType<float, 2> *>(outputBuffer->descriptor));
 }
 
+// clang-format off
 // For now, we can only dump the IR for `test_mnist_jit_tensors`.
 // Once buffer allocation is implemented we will only have an execution test.
 //
@@ -206,39 +213,47 @@
 //
 // Matmul
 // CHECK: linalg.generic
-// CHECK:   tensor<?x784xf32>, tensor<784x256xf32> -> tensor<?x256xf32>
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x784xf32>, tensor<784x256xf32>)
+// CHECK-SAME:   init(%{{[a-z0-9]*}} : tensor<?x256xf32>)
+// CHECK:   -> tensor<?x256xf32>
 //
 // Pointwise
 // CHECK: linalg.generic
+// CHECK-SAME:  tensor<?x256xf32>, tensor<256xf32>
 // CHECK:   addf
 // CHECK:   mulf
 // CHECK:   tanh
 // CHECK:   mulf
 // CHECK:   addf
 // CHECK:   addf
-// CHECK:   tensor<?x256xf32>, tensor<256xf32> -> tensor<?x256xf32>
+// CHECK:   -> tensor<?x256xf32>
 //
 // Matmul
 // CHECK: linalg.generic
-// CHECK:   tensor<?x256xf32>, tensor<256x256xf32> -> tensor<?x256xf32>
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x256xf32>, tensor<256x256xf32>)
+// CHECK-SAME:   init(%{{[a-z0-9]*}} : tensor<?x256xf32>)
+// CHECK:   -> tensor<?x256xf32>
 //
 // Pointwise
 // CHECK: linalg.generic
-// CHECK:   tensor<?x256xf32>, tensor<256xf32> -> tensor<?x256xf32>
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x256xf32>, tensor<256xf32>)
+// CHECK:   -> tensor<?x256xf32>
 //
 // Matmul
 // CHECK: linalg.generic
-// CHECK:   tensor<?x256xf32>, tensor<256x10xf32> -> tensor<?x10xf32>
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x256xf32>, tensor<256x10xf32>)
+// CHECK-SAME:   init(%{{[a-z0-9]*}} : tensor<?x10xf32>)
+// CHECK:   -> tensor<?x10xf32>
 //
 // Pointwise
 // CHECK: linalg.generic
-// CHECK:   tensor<?x10xf32>, tensor<10xf32> -> tensor<?x10xf32>
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x10xf32>, tensor<10xf32>)
+// CHECK:   -> tensor<?x10xf32>
 // CHECK:   return {{.*}} : tensor<?x10xf32>
 
 // Execution test for `test_mnist_jit_buffers`.
 //
 // CHECK: Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [3, 10]
 // CHECK-SAME: strides = [10, 1] data =
-// clang-format off
 // CHECK-COUNT-3: {{.*[[:space:]].*}}[3177.93,   3177.93,   3177.93,   3177.93,   3177.93,   3177.93,   3177.93,   3177.93,   3177.93,   3177.93]
 // clang-format on
diff --git a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
index 37ed038..0850df2 100644
--- a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
+++ b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
@@ -74,12 +74,12 @@
 /// Returns an ArrayAttr that contains `nLoops` attributes. All the attributes
 /// are "parallel" except the last `nReduction` elements, where are "reduction"
 /// attributes.
-static ArrayAttr getParallelAndReductionIterAttrs(Builder b, unsigned nLoops,
-                                                  unsigned nReduction) {
-  SmallVector<Attribute, 3> attrs(
-      nLoops - nReduction, b.getStringAttr(getParallelIteratorTypeName()));
-  attrs.append(nReduction, b.getStringAttr(getReductionIteratorTypeName()));
-  return b.getArrayAttr(attrs);
+static SmallVector<StringRef, 3> getParallelAndReductionIterators(
+    unsigned nLoops, unsigned nReduction) {
+  SmallVector<StringRef, 3> res(nLoops - nReduction,
+                                getParallelIteratorTypeName());
+  res.append(nReduction, getReductionIteratorTypeName());
+  return res;
 }
 
 /// Emits linalg.fill op to fill the given `buffer` with zero value.
@@ -522,16 +522,15 @@
         nloops, /*symbolCount=*/0, outputExprs, rewriter.getContext()));
 
     Location loc = op.getLoc();
-    SmallVector<Value, 4> linalgOpArgs = {inputBuffers[0], inputBuffers[1],
-                                          resultBuffers[0]};
 
     SmallVector<StringRef, 3> loopAttributeTypes(spatialDims + 3, "parallel");
     loopAttributeTypes.append(spatialDims, "reduction");
     rewriter.create<linalg::GenericOp>(
-        loc, ArrayRef<Type>{}, linalgOpArgs,
-        2,  // args_in
-        1,  // args_out
-        indexingMaps, loopAttributeTypes,
+        loc,
+        /*resultTensorTypes=*/ArrayRef<Type>{},
+        /*inputs=*/inputBuffers,
+        /*outputs=*/resultBuffers, /*intTensors*/ ValueRange{}, indexingMaps,
+        loopAttributeTypes,
         [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
           Value mul = nestedBuilder.create<MulFOp>(nestedLoc, args[0], args[1]);
           Value add = nestedBuilder.create<AddFOp>(nestedLoc, mul, args[2]);
@@ -572,7 +571,7 @@
   int dim = op.dimension();
   int rank = inputBuffers[0].getType().cast<ShapedType>().getRank();
 
-  SmallVector<Attribute, 2> indexingMaps;
+  SmallVector<AffineMap, 2> indexingMaps;
   SmallVector<AffineExpr, 4> exprs;
   exprs.resize(rank);
   for (int i = 0, j = 0, e = rank; i < e; ++i) {
@@ -582,33 +581,31 @@
   int nloops = rank + inputBuffers.size();
   for (int i = 0, e = inputBuffers.size(); i < e; ++i) {
     exprs[dim] = rewriter.getAffineDimExpr(rank + i);
-    indexingMaps.emplace_back(AffineMapAttr::get(AffineMap::get(
-        nloops, /*symbolCount=*/0, exprs, rewriter.getContext())));
+    indexingMaps.emplace_back(AffineMap::get(nloops, /*symbolCount=*/0, exprs,
+                                             rewriter.getContext()));
   }
   exprs[dim] = rewriter.getAffineDimExpr(rank - 1);
-  indexingMaps.emplace_back(AffineMapAttr::get(
-      AffineMap::get(nloops, /*symbolCount=*/0, exprs, rewriter.getContext())));
+  indexingMaps.emplace_back(
+      AffineMap::get(nloops, /*symbolCount=*/0, exprs, rewriter.getContext()));
 
   SmallVector<Type, 4> bodyArgTypes, opResultTypes;
   // Also make the dimension to be concatenated not a parallel loop.
   int nonParallelLoops = nloops - rank + 1;
-  SmallVector<Value, 2> linalgOpArgs(inputBuffers.begin(), inputBuffers.end());
-  linalgOpArgs.push_back(resultBuffers[0]);
   auto linalgOp = rewriter.create<linalg::IndexedGenericOp>(
-      loc, opResultTypes, linalgOpArgs,
-      rewriter.getI64IntegerAttr(inputBuffers.size()),  // args_in
-      rewriter.getI64IntegerAttr(1),                    // args_out
-      rewriter.getArrayAttr(indexingMaps),
-      getParallelAndReductionIterAttrs(rewriter, nloops, nonParallelLoops),
-      /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
+      loc, /*resultTensorTypes=*/opResultTypes, /*inputs=*/inputBuffers,
+      /*outputBuffers=*/resultBuffers,
+      /*initTensors=*/ValueRange{}, indexingMaps,
+      getParallelAndReductionIterators(nloops, nonParallelLoops));
 
   // Add a block to the region.
   auto *region = &linalgOp.region();
   auto *block = rewriter.createBlock(region, region->end());
   bodyArgTypes.append(nloops, rewriter.getIndexType());
   auto resultType = op.getResult().getType().dyn_cast<ShapedType>();
-  bodyArgTypes.append(linalgOpArgs.size(), resultType.getElementType());
+  bodyArgTypes.append(linalgOp.getNumInputsAndOutputBuffers(),
+                      resultType.getElementType());
   block->addArguments(bodyArgTypes);
+  OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointToEnd(block);
 
   Value accBound = rewriter.create<ConstantIndexOp>(loc, 0);
@@ -788,29 +785,21 @@
   Location loc = op.getLoc();
   Value output = op.getResult();
   int rank = output.getType().cast<ShapedType>().getRank();
-  SmallVector<Attribute, 2> indexingMaps;
+  SmallVector<AffineMap, 2> indexingMaps;
   SmallVector<AffineExpr, 4> exprs;
-  for (int i = 0; i < batch; ++i) {
-    exprs.push_back(rewriter.getAffineDimExpr(i));
-  }
-  for (int i = 0, e = nIndices - batch; i < e; ++i) {
+  for (int i = 0; i < batch; ++i) exprs.push_back(rewriter.getAffineDimExpr(i));
+  for (int i = 0, e = nIndices - batch; i < e; ++i)
     exprs.push_back(rewriter.getAffineDimExpr(axis + i));
-  }
-  indexingMaps.emplace_back(AffineMapAttr::get(
-      AffineMap::get(rank, /*symbolCount=*/0, exprs, rewriter.getContext())));
   indexingMaps.emplace_back(
-      AffineMapAttr::get(rewriter.getMultiDimIdentityMap(rank)));
+      AffineMap::get(rank, /*symbolCount=*/0, exprs, rewriter.getContext()));
+  indexingMaps.emplace_back(rewriter.getMultiDimIdentityMap(rank));
+  auto linalgOp = rewriter.create<linalg::IndexedGenericOp>(
+      loc, /*resultTensors=*/ArrayRef<Type>{}, /*inputs=*/adaptor.index(),
+      /*outputBuffers=*/resultBuffers, /*initTensors=*/ValueRange{},
+      indexingMaps, getParallelAndReductionIterators(rank, /*nReduction=*/0));
 
   SmallVector<Type, 4> bodyArgTypes, opResultTypes;
   SmallVector<Value, 2> linalgOpArgs = {adaptor.index(), resultBuffers[0]};
-  auto linalgOp = rewriter.create<linalg::IndexedGenericOp>(
-      loc, opResultTypes, linalgOpArgs,
-      rewriter.getI64IntegerAttr(1),  // args_in
-      rewriter.getI64IntegerAttr(1),  // args_out
-      rewriter.getArrayAttr(indexingMaps),
-      getParallelAndReductionIterAttrs(rewriter, rank, /*nReduction=*/0),
-      /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
-
   // Add a block to the region.
   auto *region = &linalgOp.region();
   auto *block = rewriter.createBlock(region, region->end());
@@ -820,6 +809,7 @@
         blockArgs.getType().cast<ShapedType>().getElementType());
   }
   block->addArguments(bodyArgTypes);
+  OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointToEnd(block);
 
   SmallVector<Value, 4> indices;
@@ -835,7 +825,6 @@
 
   Value res = rewriter.create<LoadOp>(loc, adaptor.input(), indices);
   rewriter.create<linalg::YieldOp>(loc, res);
-
   return success();
 }
 
@@ -1085,12 +1074,12 @@
   // initial value and dst, respectively.
   // Transpose `src` to make the reduction loops be the innermost, because it's
   // easier to fully utilize processors.
-  SmallVector<Attribute, 3> indexingMaps;
-  indexingMaps.emplace_back(AffineMapAttr::get(getTransposeMapForReduction(
-      rewriter.getContext(), nInputRank, reductionDims)));
+  SmallVector<AffineMap, 3> indexingMaps;
+  indexingMaps.emplace_back(getTransposeMapForReduction(
+      rewriter.getContext(), nInputRank, reductionDims));
   if (!initConstVal) {
-    indexingMaps.emplace_back(AffineMapAttr::get(
-        AffineMap::get(nInputRank, /*symbolCount=*/0, rewriter.getContext())));
+    indexingMaps.emplace_back(
+        AffineMap::get(nInputRank, /*symbolCount=*/0, rewriter.getContext()));
   }
   // The indexing map of `dst` should drop the reduction loops. Since the
   // reduction loops now are all in the innermost, drops `reductionDims.size()`
@@ -1100,30 +1089,26 @@
   for (int i = 0, e = nInputRank - reductionDims.size(); i < e; ++i) {
     exprs.push_back(rewriter.getAffineDimExpr(i));
   }
-  indexingMaps.emplace_back(AffineMapAttr::get(
+  indexingMaps.emplace_back(
       exprs.empty()
           ? AffineMap::get(nInputRank, /*symbolCount=*/0, rewriter.getContext())
           : AffineMap::get(nInputRank, /*symbolCount=*/0, exprs,
-                           rewriter.getContext())));
+                           rewriter.getContext()));
 
   SmallVector<Type, 2> resultTypes = {};
-  SmallVector<Value, 2> linalgOpArgs = {inputBuffers[0]};
+  SmallVector<Value, 2> inputs = {inputBuffers[0]};
   if (!initConstVal) {
-    linalgOpArgs.push_back(inputBuffers[1]);
+    inputs.push_back(inputBuffers[1]);
   }
-  linalgOpArgs.push_back(resultBuffers[0]);
   if (failed(zeroFillBuffer(loc, resultBuffers[0], rewriter))) {
     rewriter.notifyMatchFailure(reduceOp, "failed to zero fill result buffer");
     return failure();
   }
   auto linalgOp = rewriter.create<linalg::IndexedGenericOp>(
-      loc, resultTypes, linalgOpArgs,
-      rewriter.getI64IntegerAttr(linalgOpArgs.size() - 1),  // args_in
-      rewriter.getI64IntegerAttr(1),                        // args_out
-      rewriter.getArrayAttr(indexingMaps),
-      getParallelAndReductionIterAttrs(rewriter, nInputRank,
-                                       reductionDims.size()),
-      /*doc=*/nullptr, /*library_call=*/nullptr, /*symbol_source=*/nullptr);
+      loc, /*resultTensorTypes=*/resultTypes, /*inputs=*/inputs,
+      /*outputBuffers=*/resultBuffers, /*initTensors*/ ValueRange{},
+      indexingMaps,
+      getParallelAndReductionIterators(nInputRank, reductionDims.size()));
 
   linalgOp.region().takeBody(reduceOp.body());
   {
@@ -1196,11 +1181,12 @@
     // generic/indexed_generic op, but with memrefs.
     // TODO(ravishankarm): Figure out how to do this inplace.
     auto linalgBufferOp = rewriter.template create<LinalgOpTy>(
-        op.getLoc(), ArrayRef<Type>(), opArgs, op.args_in(), op.args_out(),
-        op.indexing_maps(), op.iterator_types(),
-        /*doc=*/nullptr,
-        /*library_call=*/nullptr,
-        /*symbol_source=*/nullptr);
+        op.getLoc(), inputBuffers, resultBuffers,
+        llvm::to_vector<4>(
+            op.indexing_maps().template getAsValueRange<AffineMapAttr>()),
+        llvm::to_vector<4>(
+            op.iterator_types().template getAsValueRange<StringAttr>()));
+
     // Move the region from the replaced op into the new op.
     unsigned numTensorOperands = op.getNumOperands();
     // indexed_generic op has arguments for each index. In the case of generic
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir b/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir
index 760983b..7f637f1 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir
@@ -8,16 +8,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xf32>, tensor<{{.+}}xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: f32, %[[OPERAND_IN2:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = addf %[[OPERAND_IN1]], %[[OPERAND_IN2]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<{{.+}}xf32>, tensor<{{.+}}xf32> -> tensor<{{.+}}xf32>
+// CHECK-NEXT: } -> tensor<{{.+}}xf32>
 
 // -----
 
@@ -29,16 +27,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xi32>, tensor<{{.+}}xi32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: i32, %[[OPERAND_IN2:.+]]: i32):
 // CHECK-NEXT:   %[[RESULT:.+]] = addi %[[OPERAND_IN1]], %[[OPERAND_IN2]] : i32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
-// CHECK-NEXT: }: tensor<{{.+}}xi32>, tensor<{{.+}}xi32> -> tensor<{{.+}}xi32>
+// CHECK-NEXT: } -> tensor<{{.+}}xi32>
 
 // -----
 
@@ -51,16 +47,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xf32>, tensor<{{.+}}xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: f32, %[[OPERAND_IN2:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = subf %[[OPERAND_IN1]], %[[OPERAND_IN2]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<{{.+}}xf32>, tensor<{{.+}}xf32> -> tensor<{{.+}}xf32>
+// CHECK-NEXT: } -> tensor<{{.+}}xf32>
 
 // -----
 
@@ -72,16 +66,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xi32>, tensor<{{.+}}xi32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: i32, %[[OPERAND_IN2:.+]]: i32):
 // CHECK-NEXT:   %[[RESULT:.+]] = subi %[[OPERAND_IN1]], %[[OPERAND_IN2]] : i32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
-// CHECK-NEXT: }: tensor<{{.+}}xi32>, tensor<{{.+}}xi32> -> tensor<{{.+}}xi32>
+// CHECK-NEXT: } -> tensor<{{.+}}xi32>
 
 // -----
 
@@ -93,16 +85,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xf32>, tensor<{{.+}}xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: f32, %[[OPERAND_IN2:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = mulf %[[OPERAND_IN1]], %[[OPERAND_IN2]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<{{.+}}xf32>, tensor<{{.+}}xf32> -> tensor<{{.+}}xf32>
+// CHECK-NEXT: } -> tensor<{{.+}}xf32>
 
 // -----
 
@@ -114,16 +104,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xi32>, tensor<{{.+}}xi32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: i32, %[[OPERAND_IN2:.+]]: i32):
 // CHECK-NEXT:   %[[RESULT:.+]] = muli %[[OPERAND_IN1]], %[[OPERAND_IN2]] : i32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
-// CHECK-NEXT: }: tensor<{{.+}}xi32>, tensor<{{.+}}xi32> -> tensor<{{.+}}xi32>
+// CHECK-NEXT: } -> tensor<{{.+}}xi32>
 
 // -----
 
@@ -135,16 +123,14 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xf32>, tensor<{{.+}}xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: f32, %[[OPERAND_IN2:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = divf %[[OPERAND_IN1]], %[[OPERAND_IN2]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<{{.+}}xf32>, tensor<{{.+}}xf32> -> tensor<{{.+}}xf32>
+// CHECK-NEXT: } -> tensor<{{.+}}xf32>
 
 // -----
 
@@ -156,13 +142,11 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 2
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]], #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}}, %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<{{.+}}xi32>, tensor<{{.+}}xi32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN1:.+]]: i32, %[[OPERAND_IN2:.+]]: i32):
 // CHECK-NEXT:   %[[RESULT:.+]] = divi_signed %[[OPERAND_IN1]], %[[OPERAND_IN2]] : i32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
-// CHECK-NEXT: }: tensor<{{.+}}xi32>, tensor<{{.+}}xi32> -> tensor<{{.+}}xi32>
+// CHECK-NEXT: } -> tensor<{{.+}}xi32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir b/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir
index af36509..ed9b158 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir
@@ -6,11 +6,11 @@
   //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
   //      CHECK: @concatenate
   //      CHECK: linalg.indexed_generic {
-  // CHECK-SAME:   args_in = 2
-  // CHECK-SAME:   args_out = 1
   // CHECK-SAME:   indexing_maps
   // CHECK-SAME:   #[[MAP0]], #[[MAP1]], #[[MAP2]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction", "reduction"]
+  // CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} :
+  // CHECK-SAME:   outs(%{{[a-z0-9]*}} :
   func @concatenate() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<2x2xi32>
@@ -35,11 +35,11 @@
 //  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d0)>
 //      CHECK: @concatenate
 //      CHECK: linalg.indexed_generic {
-// CHECK-SAME:   args_in = 2
-// CHECK-SAME:   args_out = 1
 // CHECK-SAME:   indexing_maps
 // CHECK-SAME:   #[[MAP0]], #[[MAP1]], #[[MAP2]]
 // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction", "reduction"]
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} :
+// CHECK-SAME:   outs(%{{[a-z0-9]*}} :
 module {
   func @concatenate() {
     %c0 = constant 0 : index
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir b/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir
index 2a065e8..5ee4a70 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir
@@ -74,7 +74,8 @@
 // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)>
 // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d4, d3)>
 // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4 * 3 + d3)>
-// CHECK: linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]
+// CHECK: linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]
+// CHECK-SAME:   ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
+// CHECK-SAME:   outs(%{{[a-z0-9]*}} : memref<2x3x4x6xf32>)
 // CHECK: mulf
 // CHECK: addf
-// CHECK: memref<2x4x5x2xf32>, memref<2x2x2x3xf32>, memref<2x3x4x6xf32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir b/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir
index fb0c143..75259d2 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir
@@ -8,13 +8,11 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 1
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}} : tensor<?x?xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = exp %[[OPERAND_IN]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<?x?xf32> -> tensor<?x?xf32>
+// CHECK-NEXT: } -> tensor<?x?xf32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir b/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir
index bcbb8de..7d0feed 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir
@@ -7,13 +7,11 @@
   return
 }
 // CHECK: linalg.generic {
-// CHECK-SAME: args_in = 1
-// CHECK-SAME: args_out = 1
 // CHECK-SAME: indexing_maps
 // CHECK-SAME: #[[MAP0]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: %{{.+}} {
+// CHECK-SAME:   ins(%{{[a-z0-9]*}} : tensor<2x2xf32>)
 // CHECK-NEXT: ^{{.+}}(%[[OPERAND_IN:.+]]: f32):
 // CHECK-NEXT:   %[[RESULT:.+]] = exp %[[OPERAND_IN]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
-// CHECK-NEXT: }: tensor<2x2xf32> -> tensor<2x2xf32>
+// CHECK-NEXT: } -> tensor<2x2xf32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/fusion.mlir b/iree/compiler/Conversion/HLOToLinalg/test/fusion.mlir
index bb090b8..7e85231 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/fusion.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/fusion.mlir
@@ -6,12 +6,12 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<4x25xf32>
     %1 = linalg.tensor_reshape %0 [affine_map<(d0, d1) -> (d0, d1)>] : tensor<4x25xf32> into tensor<100xf32>
-    %2 = linalg.generic {
-           args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0],
-           iterator_types = ["parallel"]} %1 {
+    %2 = linalg.generic {indexing_maps = [#map0, #map0],
+           iterator_types = ["parallel"]}
+    ins(%1 : tensor<100xf32>) {
     ^bb0(%arg0: f32):
        linalg.yield %arg0 : f32
-    } : tensor<100xf32> -> tensor<100xf32>
+    } -> tensor<100xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<100xf32>
     return
   }
@@ -59,16 +59,18 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<10xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<5xf32>
     %2 = linalg.tensor_reshape %0 [#map0] : tensor<10xf32> into tensor<1x2x5xf32>
-    %3 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel"]} %1 {
+    %3 = linalg.generic {i64, indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel"]}
+    ins(%1 : tensor<5xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       linalg.yield %arg0 : f32
-    }: tensor<5xf32> -> tensor<2x5xf32>
+    } -> tensor<2x5xf32>
     %4 = linalg.tensor_reshape %2 [#map3, #map4] : tensor<1x2x5xf32> into tensor<2x5xf32>
-    %5 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel", "parallel"]} %4, %3 {
+    %5 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel", "parallel"]}
+    ins(%4, %3 : tensor<2x5xf32>, tensor<2x5xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
       %8 = addf %arg0, %arg1 : f32
       linalg.yield %8 : f32
-    }: tensor<2x5xf32>, tensor<2x5xf32> -> tensor<2x5xf32>
+    } -> tensor<2x5xf32>
     %6 = linalg.tensor_reshape %5 [#map3, #map4] : tensor<2x5xf32> into tensor<1x2x5xf32>
     %7 = linalg.tensor_reshape %6 [#map0] : tensor<1x2x5xf32> into tensor<10xf32>
     hal.interface.store.tensor %7, @legacy_io::@ret0, offset = %c0 : tensor<10xf32>
@@ -99,16 +101,18 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x1x1x1000xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<1000xf32>
-    %2 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %1 {
+    %2 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+    ins(%1 : tensor<1000xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       linalg.yield %arg0 : f32
-    }: tensor<1000xf32> -> tensor<1000xf32>
+    } -> tensor<1000xf32>
     %3 = linalg.tensor_reshape %0 [#map1] : tensor<1x1x1x1000xf32> into tensor<1000xf32>
-    %4 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %3, %2 {
+    %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]}
+    ins(%3, %2 : tensor<1000xf32>, tensor<1000xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
       %7 = addf %arg0, %arg1 : f32
       linalg.yield %7 : f32
-    }: tensor<1000xf32>, tensor<1000xf32> -> tensor<1000xf32>
+    } -> tensor<1000xf32>
     %5 = linalg.tensor_reshape %4 [#map1] : tensor<1000xf32> into tensor<1x1x1x1000xf32>
     %6 = linalg.tensor_reshape %5 [#map2, #map3] : tensor<1x1x1x1000xf32> into tensor<1x1000xf32>
     hal.interface.store.tensor %5, @legacy_io::@ret0, offset = %c0 : tensor<1x1x1x1000xf32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/linalg_tensor_to_buffer.mlir b/iree/compiler/Conversion/HLOToLinalg/test/linalg_tensor_to_buffer.mlir
index 9363815..057e909 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/linalg_tensor_to_buffer.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/linalg_tensor_to_buffer.mlir
@@ -9,14 +9,14 @@
       {operand_result_index = 0 : i32} : tensor<2x2xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0
       {operand_result_index = 1 : i32} : tensor<2x2xf32>
-    %2 = linalg.generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    %2 = linalg.generic {
        indexing_maps = [#map0, #map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0, %1 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0, %1 : tensor<2x2xf32>, tensor<2x2xf32>) {
     ^bb0(%arg3: f32, %arg4: f32):       // no predecessors
       %3 = addf %arg3, %arg4 : f32
       linalg.yield %3 : f32
-    }: tensor<2x2xf32>, tensor<2x2xf32> -> tensor<2x2xf32>
+    } -> tensor<2x2xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0
       {operand_result_index = 2 : i32} : tensor<2x2xf32>
     return
@@ -36,7 +36,8 @@
 //   CHECK-DAG: %[[ARG1:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1, operand_result_index = 1 : i32}
 //   CHECK-NOT: hal.interface.load.tensor
 //       CHECK: linalg.generic
-//  CHECK-SAME:   %[[ARG0]], %[[ARG1]], %[[ARG2]]
+//  CHECK-SAME:   ins(%[[ARG0]], %[[ARG1]] :
+//  CHECK-SAME:   outs(%[[ARG2]] :
 //       CHECK:   ^{{[a-zA-Z0-9$._-]+}}
 //  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9$._-]+]]: f32
 //  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9$._-]+]]: f32
@@ -53,17 +54,17 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0
       {operand_result_index = 0 : i32} : tensor<2x2xi32>
-    %1 = linalg.indexed_generic
-      {args_in = 1 : i64, args_out = 1 : i64,
+    %1 = linalg.indexed_generic {
        indexing_maps = [#map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0 : tensor<2x2xi32>) {
     ^bb0(%arg2: index, %arg3: index, %arg4: i32):       // no predecessors
       %2 = index_cast %arg2 : index to i32
       %3 = index_cast %arg3 : index to i32
       %4 = addi %arg4, %2 : i32
       %5 = addi %4, %3 : i32
       linalg.yield %5 : i32
-    }: tensor<2x2xi32> -> tensor<2x2xi32>
+    } -> tensor<2x2xi32>
     hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0
       {operand_result_index = 1 : i32} : tensor<2x2xi32>
     return
@@ -80,7 +81,8 @@
 //  CHECK-DAG: %[[ARG0:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
 //  CHECK-NOT: hal.interface.load.tensor
 //      CHECK: linalg.indexed_generic
-// CHECK-SAME:   %[[ARG0]], %[[RET0]]
+// CHECK-SAME:   ins(%[[ARG0]] :
+// CHECK-SAME:   outs(%[[RET0]] :
 //  CHECK-NOT: hal.interface.store.tensor
 //      CHECK:   ^{{[a-zA-Z0-9$._-]+}}
 // CHECK-SAME:       %[[ARG2:[a-zA-Z0-9$._-]+]]: index
@@ -107,14 +109,14 @@
       {operand_result_index = 1 : i32} : tensor<5xf32>
     %2 = linalg.tensor_reshape %0 [#map0] : tensor<5xf32> into tensor<5x1xf32>
     %3 = linalg.tensor_reshape %1 [#map0] : tensor<5xf32> into tensor<1x5xf32>
-    %4 = linalg.generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    %4 = linalg.generic {
        indexing_maps = [#map1, #map2, #map0],
-       iterator_types = ["parallel", "parallel"]} %2, %3 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%2, %3 : tensor<5x1xf32>, tensor<1x5xf32>) {
          ^bb0(%arg3: f32, %arg4: f32):       // no predecessors
            %5 = addf %arg3, %arg4 : f32
            linalg.yield %5 : f32
-         }: tensor<5x1xf32>, tensor<1x5xf32> -> tensor<5x5xf32>
+         } -> tensor<5x5xf32>
     %6 = linalg.tensor_reshape %4 [#map0] : tensor<5x5xf32> into tensor<25xf32>
     hal.interface.store.tensor %6, @legacy_io::@ret0, offset = %c0
       {operand_result_index = 2 : i32} : tensor<25xf32>
@@ -141,7 +143,8 @@
 //   CHECK-DAG:   %[[RHS:.*]] = linalg.reshape %[[ARG1]] [#[[MAP0]]]
 //       CHECK:   linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]], #[[MAP0]]]
-//  CHECK-SAME:     %[[LHS]], %[[RHS]], %[[RESULT]]
+//  CHECK-SAME:     ins(%[[LHS]], %[[RHS]] :
+//  CHECK-SAME:     outs(%[[RESULT]] :
 
 // -----
 
@@ -179,14 +182,14 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0
       {operand_result_index = 0 : i32} : tensor<2x4xf32>
-    %1 = linalg.generic
-      {args_in = 1 : i64, args_out = 1 : i64,
+    %1 = linalg.generic {
        indexing_maps = [#map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0 : tensor<2x4xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       %2 = tanh %arg0 : f32
       linalg.yield %2 : f32
-    }: tensor<2x4xf32> -> tensor<2x4xf32>
+    } -> tensor<2x4xf32>
     hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0
       {operand_result_index = 1 : i32} : tensor<2x4xf32>
     hal.interface.store.tensor %1, @legacy_io::@ret1, offset = %c0
@@ -207,7 +210,9 @@
 //   CHECK-DAG:   %[[T0:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0, operand_result_index = 1 : i32}
 //       CHECK:   %[[T1:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1, operand_result_index = 2 : i32}
 //       CHECK:   %[[T2:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
-//       CHECK:   linalg.generic {{.*}} %[[T2]], %[[T0]]
+//       CHECK:   linalg.generic
+//  CHECK-SAME:     ins(%[[T2]] :
+//  CHECK-SAME:     outs(%[[T0]] :
 //       CHECK:   linalg.copy(%[[T0]], %[[T1]])
 
 // -----
@@ -221,14 +226,14 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0
       {operand_result_index = 0 : i32} : tensor<2x4xf32>
-    %1 = linalg.generic
-      {args_in = 1 : i64, args_out = 1 : i64,
+    %1 = linalg.generic {
        indexing_maps = [#map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0 : tensor<2x4xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       %2 = tanh %arg0 : f32
       linalg.yield %2 : f32
-    }: tensor<2x4xf32> -> tensor<2x4xf32>
+    } -> tensor<2x4xf32>
     %3 = linalg.tensor_reshape %1 [#map1, #map2]
       : tensor<2x4xf32> into tensor<1x2x4xf32>
     hal.interface.store.tensor %3, @legacy_io::@ret1, offset = %c0
@@ -252,7 +257,9 @@
 //       CHECK:   %[[T1:.*]] = linalg.reshape %[[T0]]
 //       CHECK:   %[[T2:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0, operand_result_index = 1 : i32}
 //       CHECK:   %[[T3:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
-//       CHECK:   linalg.generic {{.*}} %[[T3]], %[[T1]]
+//       CHECK:   linalg.generic
+//  CHECK-SAME:     ins(%[[T3]] :
+//  CHECK-SAME:     outs(%[[T1]] :
 //       CHECK:   linalg.copy(%[[T1]], %[[T2]])
 //       CHECK:   return
 
@@ -267,14 +274,14 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0
       {operand_result_index = 0 : i32} : tensor<2x4xf32>
-    %1 = linalg.generic
-      {args_in = 1 : i64, args_out = 1 : i64,
+    %1 = linalg.generic {
        indexing_maps = [#map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0 : tensor<2x4xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       %2 = tanh %arg0 : f32
       linalg.yield %2 : f32
-    }: tensor<2x4xf32> -> tensor<2x4xf32>
+    } -> tensor<2x4xf32>
     %3 = linalg.tensor_reshape %1 [#map1, #map2]
       : tensor<2x4xf32> into tensor<1x2x4xf32>
     hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0
@@ -297,7 +304,9 @@
 //   CHECK-DAG:   %[[T0:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0, operand_result_index = 1 : i32}
 //   CHECK-DAG:   %[[T1:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1, operand_result_index = 2 : i32}
 //   CHECK-DAG:   %[[T2:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
-//       CHECK:   linalg.generic {{.*}} %[[T2]], %[[T0]]
+//       CHECK:   linalg.generic
+//  CHECK-SAME:     ins(%[[T2]] :
+//  CHECK-SAME:     outs(%[[T0]] :
 //       CHECK:   %[[T3:.*]] = linalg.reshape %[[T0]]
 //       CHECK:   linalg.copy(%[[T3]], %[[T1]])
 //       CHECK:   return
@@ -313,14 +322,14 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0
       {operand_result_index = 0 : i32} : tensor<2x4xf32>
-    %1 = linalg.generic
-      {args_in = 1 : i64, args_out = 1 : i64,
+    %1 = linalg.generic {
        indexing_maps = [#map0, #map0],
-       iterator_types = ["parallel", "parallel"]} %0 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%0 : tensor<2x4xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       %2 = tanh %arg0 : f32
       linalg.yield %2 : f32
-    }: tensor<2x4xf32> -> tensor<2x4xf32>
+    } -> tensor<2x4xf32>
     %3 = linalg.tensor_reshape %1 [#map1, #map2]
       : tensor<2x4xf32> into tensor<1x2x4xf32>
     %4 = linalg.tensor_reshape %1 [#map1, #map2]
@@ -353,7 +362,9 @@
 //   CHECK-DAG:   %[[T2:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1, operand_result_index = 2 : i32}
 //   CHECK-DAG:   %[[T3:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret2, operand_result_index = 3 : i32}
 //   CHECK-DAG:   %[[T4:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
-//       CHECK:   linalg.generic {{.*}} %[[T4]], %[[T1]]
+//       CHECK:   linalg.generic
+//  CHECK-SAME:   ins(%[[T4]] :
+//  CHECK-SAME:   outs(%[[T1]] :
 //       CHECK:   linalg.copy(%[[T0]], %[[T2]])
 //       CHECK:   linalg.copy(%[[T0]], %[[T3]])
 //       CHECK:   return
@@ -373,14 +384,14 @@
       : tensor<1x128x128x1xf32> into tensor<128x128xf32>
     %2 = linalg.tensor_reshape %0 [#map0, #map1]
       : tensor<1x128x128x1xf32> into tensor<128x128xf32>
-    %3 = linalg.generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    %3 = linalg.generic {
        indexing_maps = [#map2, #map2, #map2],
-       iterator_types = ["parallel", "parallel"]} %1, %2 {
+       iterator_types = ["parallel", "parallel"]}
+    ins(%1, %2 : tensor<128x128xf32>, tensor<128x128xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
       %5 = mulf %arg0, %arg1 : f32
       linalg.yield %5 : f32
-    }: tensor<128x128xf32>, tensor<128x128xf32> -> tensor<128x128xf32>
+    } -> tensor<128x128xf32>
     %4 = linalg.tensor_reshape %3 [#map0, #map1]
       : tensor<128x128xf32> into tensor<1x128x128x1xf32>
     hal.interface.store.tensor %4, @legacy_io::@ret0, offset = %c0
@@ -400,7 +411,9 @@
 //       CHECK:   %[[T2:.*]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0, operand_result_index = 0 : i32}
 //       CHECK:   %[[T3:.*]] = linalg.reshape %[[T2]]
 //       CHECK:   %[[T4:.*]] = linalg.reshape %[[T2]]
-//       CHECK:   linalg.generic {{.*}} %[[T3]], %[[T4]], %[[T1]]
+//       CHECK:   linalg.generic
+//  CHECK-SAME: ins(%[[T3]], %[[T4]] :
+//  CHECK-SAME: outs(%[[T1]] :
 //       CHECK:   return
 
 // -----
@@ -417,13 +430,13 @@
       {operand_result_index = 0 : i32} : tensor<1x1x1x1000xf32>
     %1 = linalg.tensor_reshape %0 [#map0]
       : tensor<1x1x1x1000xf32> into tensor<1000xf32>
-    %2 = linalg.generic
-      {args_in = 1 : i64, args_out = 1 : i64,
-       indexing_maps = [#map1, #map1], iterator_types = ["parallel"]} %1 {
+    %2 = linalg.generic {
+       indexing_maps = [#map1, #map1], iterator_types = ["parallel"]}
+    ins(%1 : tensor<1000xf32>) {
     ^bb0(%arg0: f32):  // no predecessors
       %5 = addf %arg0, %cst : f32
       linalg.yield %5 : f32
-    }: tensor<1000xf32> -> tensor<1000xf32>
+    } -> tensor<1000xf32>
     %3 = linalg.tensor_reshape %2 [#map0]
       : tensor<1000xf32> into tensor<1x1x1x1000xf32>
     %4 = linalg.tensor_reshape %3 [#map2, #map3]
@@ -454,7 +467,8 @@
 //       CHECK:   %[[ARG0_RESHAPE:.+]] = linalg.reshape %[[ARG0]]
 //  CHECK-SAME:     memref<1x1x1x1000xf32> into memref<1000xf32>
 //       CHECK:   linalg.generic
-//  CHECK-SAME:     %[[ARG0_RESHAPE]], %[[RET0_RESHAPE]]
+//  CHECK-SAME:     ins(%[[ARG0_RESHAPE]] :
+//  CHECK-SAME:     outs(%[[RET0_RESHAPE]] :
 //       CHECK:   %[[RET0_RESHAPE2:.+]] = linalg.reshape %[[RET0]]
 //  CHECK-SAME:     memref<1x1x1x1000xf32> into memref<1x1000xf32>
 //       CHECK:   linalg.copy(%[[RET0_RESHAPE2]], %[[RET1]])
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/pipeline_test.mlir b/iree/compiler/Conversion/HLOToLinalg/test/pipeline_test.mlir
index 90c4749..d3d12eb 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/pipeline_test.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/pipeline_test.mlir
@@ -51,5 +51,6 @@
 //   CHECK-DAG:   %[[ARG1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1}
 //   CHECK-DAG:   %[[ARG0:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0}
 //       CHECK:   linalg.generic
-//  CHECK-SAME:     %[[ARG0]], %[[ARG1]], %[[RET0]]
+//  CHECK-SAME:     ins(%[[ARG0]], %[[ARG1]] :
+//  CHECK-SAME:     outs(%[[RET0]] :
 //       CHECK:   linalg.copy(%[[RET0]], %[[RET1]])
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir b/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir
index f639092..282eee6 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir
@@ -8,16 +8,17 @@
   //  CHECK-DAG: %[[ARG2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<5xf32>
   //  CHECK-DAG: %[[ARG0:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x4xf32>
   //  CHECK-DAG: %[[ARG1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<f32>
-  //      CHECK: linalg.indexed_generic {args_in = 2 : i64, args_out = 1 : i64,
+  //      CHECK: linalg.indexed_generic
   // CHECK-SAME: indexing_maps
   // CHECK-SAME: #[[MAP0]], #[[MAP1]], #[[MAP2]]
   // CHECK-SAME: iterator_types = ["parallel", "reduction"]}
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] {
+  // CHECK-SAME:   ins(%[[ARG0]], %[[ARG1]] : memref<5x4xf32>, memref<f32>
+  // CHECK-SAME:   outs(%[[ARG2]] : memref<5xf32>
   // CHECK-NEXT: ^{{.+}}(%{{.+}}, %[[IDX:.+]]: index, %[[SRC:.+]]: f32, %[[INIT:.+]]: f32, %[[DST:.+]]: f32):
   //      CHECK:   %[[OPERAND:.+]] = select %{{.+}}, %[[INIT]], %[[DST]] : f32
   // CHECK-NEXT:   %[[RES:.+]] = addf %[[SRC]], %[[OPERAND]] : f32
   // CHECK-NEXT:   linalg.yield %[[RES]] : f32
-  // CHECK-NEXT: }: memref<5x4xf32>, memref<f32>, memref<5xf32>
+  // CHECK-NEXT: }
   func @reduction_entry() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
@@ -119,16 +120,17 @@
   //      CHECK: %[[ARG2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<4xf32>
   //      CHECK: %[[ARG0:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x4xf32>
   //      CHECK: %[[ARG1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<f32>
-  //      CHECK: linalg.indexed_generic {args_in = 2 : i64, args_out = 1 : i64,
+  //      CHECK: linalg.indexed_generic {
   // CHECK-SAME: indexing_maps
   // CHECK-SAME: #[[MAP0]], #[[MAP1]], #[[MAP2]]
   // CHECK-SAME: iterator_types = ["parallel", "reduction"]}
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] {
+  // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : memref<5x4xf32>, memref<f32>)
+  // CHECK-SAME: outs(%[[ARG2]] : memref<4xf32>)
   // CHECK-NEXT: ^{{.+}}(%{{.+}}, %[[IDX:.+]]: index, %[[SRC:.+]]: f32, %[[INIT:.+]]: f32, %[[DST:.+]]: f32):
   //      CHECK:   %[[OPERAND:.+]] = select %{{.+}}, %[[INIT]], %[[DST]] : f32
   // CHECK-NEXT:   %[[RES:.+]] = addf %[[SRC]], %[[OPERAND]] : f32
   // CHECK-NEXT:   linalg.yield %[[RES]] : f32
-  // CHECK-NEXT: }: memref<5x4xf32>, memref<f32>, memref<4xf32>
+  // CHECK-NEXT: }
   func @reduction_entry() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
@@ -157,8 +159,6 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x10xf32>
     // CHECK: %[[CST:.+]] = constant 0xFF800000 : f32
     // CHECK: linalg.indexed_generic
-    // CHECK-SAME: args_in = 1
-    // CHECK-SAME: args_out = 1
     // CHECK: ^{{.+}}(%{{.+}}: index, %[[DIM:.+]]: index, %{{.+}}: f32, %[[OUTPUT:.+]]: f32):
     // CHECK: select %{{.+}}, %[[CST]], %[[OUTPUT]] : f32
     %cst = constant dense<0xFF800000> : tensor<f32>
@@ -186,11 +186,12 @@
   //      CHECK: %[[ARG2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<4xf32>
   //      CHECK: %[[ARG0:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<5x4x3xf32>
   //      CHECK: %[[ARG1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<f32>
-  //      CHECK: linalg.indexed_generic {args_in = 2 : i64, args_out = 1 : i64,
+  //      CHECK: linalg.indexed_generic {
   // CHECK-SAME: indexing_maps
   // CHECK-SAME: #[[MAP0]], #[[MAP1]], #[[MAP2]]
   // CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"]}
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] {
+  // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : memref<5x4x3xf32>, memref<f32>)
+  // CHECK-SAME: outs(%[[ARG2]] : memref<4xf32>)
   // CHECK-NEXT: ^{{.+}}(%{{.+}}, %[[IDX:.+]]: index, %[[SRC:.+]]: f32, %[[INIT:.+]]: f32, %[[DST:.+]]: f32):
   //      CHECK:   %[[TRUE:.+]] = constant true
   //      CHECK:   %[[CMP1:.+]] = cmpi
@@ -200,7 +201,7 @@
   // CHECK-NEXT:   %[[OPERAND:.+]] = select %[[COND2]], %[[INIT]], %[[DST]] : f32
   // CHECK-NEXT:   %[[RES:.+]] = addf %[[SRC]], %[[OPERAND]] : f32
   // CHECK-NEXT:   linalg.yield %[[RES]] : f32
-  // CHECK-NEXT: }: memref<5x4x3xf32>, memref<f32>, memref<4xf32>
+  // CHECK-NEXT: }
   func @reduction_multi_dimensions() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4x3xf32>
diff --git a/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir b/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir
index 1cf33d1..d299854 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir
@@ -8,17 +8,15 @@
   //  CHECK-DAG: %[[INDEX:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<2xi32>
   //  CHECK-DAG: %[[OUTPUT:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<2x1x5xi32>
   //      CHECK: linalg.indexed_generic {
-  // CHECK-SAME:   args_in = 1
-  // CHECK-SAME:   args_out = 1
   // CHECK-SAME:   indexing_maps
   // CHECK-SAME:   #[[MAP0]], #[[MAP1]]
   // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]
-  // CHECK-SAME: } %[[INDEX]], %[[OUTPUT]] {
+  // CHECK-SAME: ins(%[[INDEX]] : memref<2xi32>)
+  // CHECK-SAME: outs(%[[OUTPUT]] : memref<2x1x5xi32>)
   // CHECK-NEXT: ^{{.+}}(%[[I:.+]]: index, %[[J:.+]]: index, %[[K:.+]]: index, %[[VAL:.+]]: i32, %{{.+}}: i32):
   //      CHECK:   %[[CAST:.+]] = index_cast %[[VAL]] : i32 to index
   //      CHECK:   %[[VAL2:.+]] = load %[[INPUT]][%[[CAST]], %[[J]], %[[K]]] : memref<5x1x5xi32>
   //      CHECK:   linalg.yield %[[VAL2]] : i32
-  //      CHECK: }: memref<2xi32>, memref<2x1x5xi32>
   func @torch_select_index() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x5xi32>
@@ -46,17 +44,15 @@
   //  CHECK-DAG: %[[INDEX:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<i32>
   //  CHECK-DAG: %[[OUTPUT:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<8xf32>
   //      CHECK: linalg.indexed_generic {
-  // CHECK-SAME:   args_in = 1
-  // CHECK-SAME:   args_out = 1
   // CHECK-SAME:   indexing_maps
   // CHECK-SAME:   #[[MAP0]], #[[MAP1]]
   // CHECK-SAME:   iterator_types = ["parallel"]
-  // CHECK-SAME: } %[[INDEX]], %[[OUTPUT]] {
+  // CHECK-SAME: ins(%[[INDEX]] : memref<i32>)
+  // CHECK-SAME: outs(%[[OUTPUT]] : memref<8xf32>)
   // CHECK-NEXT: ^{{.+}}(%[[I:.+]]: index, %[[VAL:.+]]: i32, %{{.+}}: f32):
   //      CHECK:   %[[CAST:.+]] = index_cast %[[VAL]] : i32 to index
   //      CHECK:   %[[VAL2:.+]] = load %[[INPUT]][%[[CAST]], %[[I]]] : memref<4x8xf32>
   //      CHECK:   linalg.yield %[[VAL2]] : f32
-  //      CHECK: }: memref<i32>, memref<8xf32>
   func @torch_select_index_scalar() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<4x8xf32>
@@ -84,17 +80,15 @@
   //  CHECK-DAG: %[[INDEX:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<4x1xi32>
   //  CHECK-DAG: %[[OUTPUT:[a-zA-Z0-9$._-]+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<4x7x1x2xf32>
   //      CHECK: linalg.indexed_generic {
-  // CHECK-SAME:   args_in = 1
-  // CHECK-SAME:   args_out = 1
   // CHECK-SAME:   indexing_maps
   // CHECK-SAME:   #[[MAP0]], #[[MAP1]]
   // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  // CHECK-SAME: } %[[INDEX]], %[[OUTPUT]] {
+  // CHECK-SAME: ins(%[[INDEX]] : memref<4x1xi32>)
+  // CHECK-SAME: outs(%[[OUTPUT]] : memref<4x7x1x2xf32>)
   // CHECK-NEXT: ^{{.+}}(%[[I:.+]]: index, %[[J:.+]]: index, %[[K:.+]]: index, %[[L:.+]]: index, %[[VAL:.+]]: i32, %{{.+}}: f32):
   //      CHECK:   %[[CAST:.+]] = index_cast %[[VAL]] : i32 to index
   //      CHECK:   %[[VAL2:.+]] = load %[[INPUT]][%[[I]], %[[J]], %[[CAST]], %[[L]]] : memref<4x7x8x2xf32>
   //      CHECK:   linalg.yield %[[VAL2]] : f32
-  //      CHECK: }: memref<4x1xi32>, memref<4x7x1x2xf32>
   func @torch_select_index_batch() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<4x7x8x2xf32>
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
index 2437491..f20d91f 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/convert_to_gpu.mlir
@@ -14,15 +14,15 @@
       {binding = @legacy_io::@arg1, operand_result_index = 9 : i32} : memref<?x?x?x?xf32>
     %arg2 = iree.placeholder for "interace buffer"
       {binding = @legacy_io::@ret0, operand_result_index = 10 : i32} : memref<?x?x?x?xf32>
-    linalg.generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    linalg.generic {
        indexing_maps = [#map0, #map0, #map0],
        iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      %arg0, %arg1, %arg2 {
+      ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
+     outs(%arg2 : memref<?x?x?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %0 = addf %arg3, %arg4 : f32
       linalg.yield %0 : f32
-    } : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
+    }
     return
   }
   func @parallel_4D__num_workgroups__
@@ -81,15 +81,15 @@
       {binding = @legacy_io::@arg1, operand_result_index = 1 : i32} : memref<3x4x5x6xf32>
     %arg2 = iree.placeholder for "interace buffer"
       {binding = @legacy_io::@ret0, operand_result_index = 2 : i32} : memref<3x4x5x6xf32>
-    linalg.generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    linalg.generic {
        indexing_maps = [#map0, #map0, #map0],
        iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      %arg0, %arg1, %arg2 {
+      ins(%arg0, %arg1 : memref<3x4x5x6xf32>, memref<3x4x5x6xf32>)
+     outs(%arg2 : memref<3x4x5x6xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %0 = addf %arg3, %arg4 : f32
       linalg.yield %0 : f32
-    } : memref<3x4x5x6xf32>, memref<3x4x5x6xf32>, memref<3x4x5x6xf32>
+    }
     return
   }
   func @parallel_4D_static__num_workgroups__
@@ -135,8 +135,6 @@
 #map0 = affine_map<() -> ()>
 #accesses = [#map0, #map0, #map0]
 #trait = {
-  args_in = 2 : i64,
-  args_out = 1 : i64,
   indexing_maps = #accesses,
   iterator_types = []
 }
@@ -154,11 +152,13 @@
       {binding = @legacy_io::@arg1, operand_result_index = 1 : i32} : memref<f32>
     %arg2 = iree.placeholder for "interace buffer"
       {binding = @legacy_io::@ret0, operand_result_index = 2 : i32} : memref<f32>
-    linalg.generic #trait %arg0, %arg1, %arg2 {
+    linalg.generic #trait
+      ins(%arg0, %arg1 : memref<f32>, memref<f32>)
+     outs(%arg2 : memref<f32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %0 = addf %arg3, %arg4 : f32
       linalg.yield %0 : f32
-     } : memref<f32>, memref<f32>, memref<f32>
+     }
      return
   }
   func @scalar_add__num_workgroups__
@@ -193,11 +193,12 @@
       {binding = @legacy_io::@arg1, operand_result_index = 1 : i32} : memref<f32>
     %arg2 = iree.placeholder for "interace buffer"
       {binding = @legacy_io::@ret0, operand_result_index = 2 : i32} : memref<?xf32>
-    linalg.indexed_generic
-      {args_in = 2 : i64, args_out = 1 : i64,
+    linalg.indexed_generic {
        indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> ()>,
                         affine_map<(d0, d1, d2) -> (d0)>],
-       iterator_types = ["parallel", "parallel", "reduction"]} %arg0, %arg1, %arg2 {
+       iterator_types = ["parallel", "parallel", "reduction"]}
+      ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<f32>)
+     outs(%arg2 : memref<?xf32>) {
     ^bb0(%arg3: index, %arg4: index, %arg5: index,
          %arg6: f32, %arg7: f32, %arg8: f32):   // no predecessors
       %c0 = constant 0 : index
@@ -207,7 +208,7 @@
       %2 = select %1, %arg7, %arg8 : f32
       %3 = addf %arg6, %2 : f32
       linalg.yield %3 : f32
-    }: memref<?x?x?xf32>, memref<f32>, memref<?xf32>
+    }
     return
   }
   hal.interface @legacy_io attributes {sym_visibility = "private"} {
@@ -242,7 +243,6 @@
 #map5 = affine_map<(d0, d1, d2) -> (d2, d1)>
 #map6 = affine_map<(d0, d1, d2) -> (d0, d1)>
 
-
 module attributes {
   spv.target_env =
     #spv.target_env<#spv.vce<v1.3, [Shader], [SPV_KHR_storage_buffer_storage_class]>,
diff --git a/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir b/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir
index c115b4a..5d3ff91 100644
--- a/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir
+++ b/iree/compiler/Conversion/LinalgToSPIRV/test/split_dispatch_function.mlir
@@ -222,13 +222,14 @@
     %0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<2x4xf32>
     %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret1} : memref<1x2x4xf32>
     %2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<2x4xf32>
-    linalg.generic {args_in = 1 : i64, args_out = 1 : i64,
-                    indexing_maps = [#map0, #map0],
-                    iterator_types = ["parallel", "parallel"]} %2, %0 {
+    linalg.generic {indexing_maps = [#map0, #map0],
+                    iterator_types = ["parallel", "parallel"]}
+      ins(%2 : memref<2x4xf32>)
+     outs(%0 : memref<2x4xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
       %4 = tanh %arg0 : f32
       linalg.yield %4 : f32
-    }: memref<2x4xf32>, memref<2x4xf32>
+    }
     %3 = linalg.reshape %0 [#map1, #map2] : memref<2x4xf32> into memref<1x2x4xf32>
     linalg.copy(%3, %1) : memref<1x2x4xf32>, memref<1x2x4xf32>
     return
@@ -260,7 +261,9 @@
 //      CHECK: func @reshape_interleaved_dispatch_0()
 //      CHECK:   %[[OUT:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<2x4xf32>
 //      CHECK:   %[[IN:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<2x4xf32>
-//      CHECK:   linalg.generic {{.*}} %[[IN]], %[[OUT]]
+//      CHECK:   linalg.generic
+// CHECK-SAME:     ins(%[[IN]] :
+// CHECK-SAME:    outs(%[[OUT]] :
 
 // -----
 
@@ -272,13 +275,14 @@
     %2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<1x512x1xf32>
     linalg.copy(%2, %0) : memref<1x512x1xf32>, memref<1x512x1xf32>
     %3 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<4x8x16xf32>
-    linalg.generic {args_in = 1 : i64, args_out = 1 : i64,
-                    indexing_maps = [affine_map<(d0, d1, d2) -> (-d0 + 3, d1, d2)>,
+    linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (-d0 + 3, d1, d2)>,
                                      affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
-                    iterator_types = ["parallel", "parallel", "parallel"]} %3, %1 {
+                    iterator_types = ["parallel", "parallel", "parallel"]}
+      ins(%3 : memref<4x8x16xf32>)
+     outs(%1 : memref<4x8x16xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
       linalg.yield %arg0 : f32
-    }: memref<4x8x16xf32>, memref<4x8x16xf32>
+    }
     return
   }
   func @predict_ex_dispatch_0__num_workgroups__(!shapex.ranked_shape<[1,512,1]>,
diff --git a/iree/compiler/Conversion/LinalgToVector/LoadStoreVectorization.cpp b/iree/compiler/Conversion/LinalgToVector/LoadStoreVectorization.cpp
index a938cd3..3a6203d 100644
--- a/iree/compiler/Conversion/LinalgToVector/LoadStoreVectorization.cpp
+++ b/iree/compiler/Conversion/LinalgToVector/LoadStoreVectorization.cpp
@@ -197,12 +197,13 @@
       rewriter.replaceOp(placeholder, arg.getResult());
       newArgs.push_back(arg.getResult());
     }
-
+    ArrayRef<Value> newArgsRef(newArgs.begin(), newArgs.end());
     auto newOp = rewriter.create<linalg::GenericOp>(
-        genericOp.getLoc(), genericOp.getResultTypes(), newArgs,
-        rewriter.getI64IntegerAttr(genericOp.getNumInputs()),
-        rewriter.getI64IntegerAttr(genericOp.getNumOutputs()),
-        genericOp.indexing_mapsAttr(), genericOp.iterator_types(),
+        genericOp.getLoc(), genericOp.getResultTypes(),
+        /*inputs=*/newArgsRef.take_front(genericOp.getNumInputs()),
+        /*outputBuffers*/ newArgsRef.take_back(genericOp.getNumOutputs()),
+        /*initTensors*/ ValueRange{}, genericOp.indexing_mapsAttr(),
+        genericOp.iterator_types(),
         /*doc=*/nullptr,
         /*library_call=*/nullptr,
         /*symbol_source=*/nullptr);
diff --git a/iree/compiler/Conversion/LinalgToVector/test/vectorize_linalg_ops.mlir b/iree/compiler/Conversion/LinalgToVector/test/vectorize_linalg_ops.mlir
index c115d51..c7a597c 100644
--- a/iree/compiler/Conversion/LinalgToVector/test/vectorize_linalg_ops.mlir
+++ b/iree/compiler/Conversion/LinalgToVector/test/vectorize_linalg_ops.mlir
@@ -4,17 +4,16 @@
   %0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<4xf32>
   %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<3x4xf32>
   %2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<3x4xf32>
-  linalg.generic {args_in = 2 : i64,
-                  args_out = 1 : i64,
-                  indexing_maps = [affine_map<(d0, d1) -> (d1)>,
+  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>,
                                    affine_map<(d0, d1) -> (d0, d1)>,
                                    affine_map<(d0, d1) -> (d0, d1)>],
-                  iterator_types = ["parallel", "parallel"]
-  } %0, %1, %2 {
+                  iterator_types = ["parallel", "parallel"]}
+    ins(%0, %1 : memref<4xf32>, memref<3x4xf32>)
+   outs(%2 : memref<3x4xf32>) {
   ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):  // no predecessors
     %3 = addf %arg0, %arg1 : f32
     linalg.yield %3 : f32
-  }: memref<4xf32>, memref<3x4xf32>, memref<3x4xf32>
+  }
   return
 }
 // CHECK-LABEL: func @broadcast_add
@@ -22,7 +21,8 @@
 //   CHECK-DAG: %[[BUF1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<3x1xvector<4xf32>>
 //   CHECK-DAG: %[[BUF2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<3x1xvector<4xf32>>
 //       CHECK: linalg.generic
-//  CHECK-SAME:   %[[BUF0]], %[[BUF1]], %[[BUF2]]
+//  CHECK-SAME:   ins(%[[BUF0]], %[[BUF1]] :
+//  CHECK-SAME:   outs(%[[BUF2]] :
 //       CHECK: ^bb0(%[[ARG0:.+]]: vector<4xf32>, %[[ARG1:.+]]: vector<4xf32>, %[[ARG2:.+]]: vector<4xf32>)
 //       CHECK:   %[[RES:.+]] = addf %[[ARG0]], %[[ARG1]] : vector<4xf32>
 //       CHECK:   linalg.yield %[[RES]] : vector<4xf32>
@@ -34,12 +34,14 @@
   %c0 = constant 0 : index
   %cst = constant 1.000000e+00 : f32
   %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<4xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %1, %0 {
+  linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]}
+    ins(%1 : memref<4xf32>)
+   outs(%0 : memref<4xf32>) {
   ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
     %2 = addf %arg0, %cst : f32
     %3 = log %2 : f32
     linalg.yield %3 : f32
-  }: memref<4xf32>, memref<4xf32>
+  }
   return
 }
 // CHECK-LABEL: func @log_plus_one
@@ -47,7 +49,8 @@
 //   CHECK-DAG: %[[BUF1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1xvector<4xf32>>
 //   CHECK-DAG: %[[CST:.+]] = constant dense<1.000000e+00> : vector<4xf32>
 //       CHECK: linalg.generic
-//  CHECK-SAME:   %[[BUF0]], %[[BUF1]]
+//  CHECK-SAME:   ins(%[[BUF0]] :
+//  CHECK-SAME:   outs(%[[BUF1]] :
 //       CHECK: ^bb0(%[[ARG0:.+]]: vector<4xf32>, %[[ARG1:.+]]: vector<4xf32>)
 //       CHECK:   %[[T1:.+]] = addf %[[ARG0]], %[[CST]] : vector<4xf32>
 //       CHECK:   %[[T2:.+]] = log %[[T1]] : vector<4xf32>
@@ -59,12 +62,14 @@
   %0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<4xi32>
   %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<4xi32>
   %2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<4xi32>
-  linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} %1, %2, %0 {
+  linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]}
+    ins(%1, %2 : memref<4xi32>, memref<4xi32>)
+   outs(%0 : memref<4xi32>) {
   ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  // no predecessors
     %3 = cmpi "sgt", %arg0, %arg1 : i32
     %4 = select %3, %arg0, %arg1 : i32
     linalg.yield %4 : i32
-  }: memref<4xi32>, memref<4xi32>, memref<4xi32>
+  }
   return
 }
 // CHECK-LABEL: func @cmp_and_select
@@ -72,7 +77,8 @@
 //   CHECK-DAG: %[[BUF1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<1xvector<4xi32>>
 //   CHECK-DAG: %[[BUF2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1xvector<4xi32>>
 //       CHECK: linalg.generic
-//  CHECK-SAME:   %[[BUF0]], %[[BUF1]], %[[BUF2]]
+//  CHECK-SAME:   ins(%[[BUF0]], %[[BUF1]] :
+//  CHECK-SAME:   outs(%[[BUF2]] :
 //       CHECK: ^bb0(%[[ARG0:.+]]: vector<4xi32>, %[[ARG1:.+]]: vector<4xi32>, %[[ARG2:.+]]: vector<4xi32>)
 //       CHECK:   %[[T1:.+]] = cmpi "sgt", %[[ARG0]], %[[ARG1]] : vector<4xi32>
 //       CHECK:   %[[T2:.+]] = select %[[T1]], %[[ARG0]], %[[ARG1]] : vector<4xi1>, vector<4xi32>
@@ -85,18 +91,18 @@
   %2 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<4xi32>
   %0 = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<4xi32>
   %cst = constant 0.000000e+00 : f32
-  linalg.generic {args_in = 2 : i64,
-                  args_out = 1 : i64,
-                  indexing_maps = [affine_map<(d0) -> (d0)>,
+  linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                    affine_map<(d0) -> (d0)>,
                                    affine_map<(d0) -> (d0)>],
-                  iterator_types = ["parallel"]} %1, %2, %0 {
+                  iterator_types = ["parallel"]}
+    ins(%1, %2 : memref<4xf32>, memref<4xi32>)
+   outs(%0 : memref<4xi32>) {
   ^bb0(%arg0: f32, %arg1: i32, %arg2: i32):  // no predecessors
     %3 = cmpf "oeq", %arg0, %cst : f32
     %4 = zexti %3 : i1 to i32
     %5 = muli %4, %arg1 : i32
     linalg.yield %5 : i32
-  }: memref<4xf32>, memref<4xi32>, memref<4xi32>
+  }
   return
 }
 // CHECK-LABEL: func @cmp_convert_mul
@@ -104,7 +110,8 @@
 //   CHECK-DAG: %[[BUF1:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg1} : memref<1xvector<4xi32>>
 //   CHECK-DAG: %[[BUF2:.+]] = iree.placeholder for "interface buffer" {binding = @legacy_io::@ret0} : memref<1xvector<4xi32>>
 //       CHECK: linalg.generic
-//  CHECK-SAME:   %[[BUF0]], %[[BUF1]], %[[BUF2]]
+//  CHECK-SAME:   ins(%[[BUF0]], %[[BUF1]] :
+//  CHECK-SAME:  outs(%[[BUF2]] :
 //       CHECK: ^bb0(%[[ARG0:.+]]: vector<4xf32>, %[[ARG1:.+]]: vector<4xi32>, %[[ARG2:.+]]: vector<4xi32>)
 //       CHECK:   %[[T1:.+]] = cmpf "oeq", %[[ARG0]], %{{.+}} : vector<4xf32>
 //       CHECK:   %[[T2:.+]] = zexti %[[T1]] : vector<4xi1> to vector<4xi32>
@@ -118,11 +125,13 @@
   %c0 = constant 0 : index
   %cst = constant 1.000000e+00 : f32
   %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<4x4xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} %1, %0 {
+  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]}
+    ins(%1 : memref<4x4xf32>)
+   outs(%0 : memref<4x4xf32>) {
   ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
     %2 = addf %arg0, %cst : f32
     linalg.yield %2 : f32
-  }: memref<4x4xf32>, memref<4x4xf32>
+  }
   return
 }
 // CHECK-LABEL: func @not_contiguous
@@ -136,11 +145,13 @@
   %c0 = constant 0 : index
   %cst = constant 1.000000e+00 : f32
   %1 = iree.placeholder for "interface buffer" {binding = @legacy_io::@arg0} : memref<4x3xf32>
-  linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} %1, %0 {
+  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]}
+    ins(%1 : memref<4x3xf32>)
+   outs(%0 : memref<4x3xf32>) {
   ^bb0(%arg0: f32, %arg1: f32):  // no predecessors
     %2 = addf %arg0, %cst : f32
     linalg.yield %2 : f32
-  }: memref<4x3xf32>, memref<4x3xf32>
+  }
   return
 }
 // CHECK-LABEL: func @not_4s
diff --git a/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp b/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp
index 708bb9b..587b9b0 100644
--- a/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp
+++ b/iree/compiler/Dialect/Vulkan/Utils/TargetEnvUtils.cpp
@@ -100,9 +100,6 @@
   MLIRContext *context = vkTargetEnv.getContext();
   auto vkCapabilities = vkTargetEnv.getCapabilitiesAttr();
   return spirv::ResourceLimitsAttr::get(
-      /*vendor_id=*/nullptr,
-      /*device_id=*/nullptr,
-      /*device_type=*/nullptr,
       /*max_compute_shared_memory_size=*/nullptr,
       vkCapabilities.maxComputeWorkGroupInvocations(),
       vkCapabilities.maxComputeWorkGroupSize(),
@@ -130,7 +127,9 @@
 
   auto triple = spirv::VerCapExtAttr::get(
       spvVersion, spvCapabilities, spvExtensions, vkTargetEnv.getContext());
-  return spirv::TargetEnvAttr::get(triple, spvLimits);
+  return spirv::TargetEnvAttr::get(
+      triple, spirv::Vendor::Unknown, spirv::DeviceType::Unknown,
+      spirv::TargetEnvAttr::kUnknownDeviceID, spvLimits);
 }
 
 }  // namespace Vulkan