Merge pull request #7265 from MaheshRavishankar:main-to-google
PiperOrigin-RevId: 401101859
diff --git a/SUBMODULE_VERSIONS.txt b/SUBMODULE_VERSIONS.txt
index 89a3db9..2d928d9 100644
--- a/SUBMODULE_VERSIONS.txt
+++ b/SUBMODULE_VERSIONS.txt
@@ -4,7 +4,7 @@
aa533abfd4232b01f9e57041d70114d5a77e6de0 third_party/googletest
88b845dee001723c4a0db1fe5477de735b6d3bb0 third_party/liburing
acd6f6f014c25e46363e718381e0b35205df2d83 third_party/libyaml
-5f7a5353301b776ffb0e5fb048992898507bf7ee third_party/llvm-project
+471b25e217e635e058bbdbca8c693e2998380a60 third_party/llvm-project
777b5c11c09fbc2e19974054351b94b3aa7ae6d0 third_party/mlir-hlo
3f701faace7addc75d16dea8a6cd769fa5b3f260 third_party/musl
4c7697dbe973ed01ae6fbec37d186ebd05982e1f third_party/pybind11
diff --git a/build_tools/benchmarks/run_benchmarks_on_android.py b/build_tools/benchmarks/run_benchmarks_on_android.py
index 0271a72..1c13a9c 100755
--- a/build_tools/benchmarks/run_benchmarks_on_android.py
+++ b/build_tools/benchmarks/run_benchmarks_on_android.py
@@ -249,10 +249,10 @@
# We can choose this benchmark if it matches the driver and CPU/GPU
# architecture.
- matched_driver = (driver_filter is None or
- iree_driver == driver_filter.lower())
- matched_arch = (target_arch == cpu_target_arch or
- target_arch == gpu_target_arch)
+ matched_driver = (
+ driver_filter is None or iree_driver == driver_filter.lower())
+ matched_arch = (
+ target_arch == cpu_target_arch or target_arch == gpu_target_arch)
should_choose = matched_driver and matched_arch
if should_choose:
matched_benchmarks.append(root)
@@ -477,10 +477,11 @@
parser.add_argument("--capture_tarball",
default=None,
help="Path to the tarball for captures")
- parser.add_argument("--no-clean",
- action="store_true",
- help="Do not clean up the temporary directory used for "
- "benchmarking on the Android device")
+ parser.add_argument(
+ "--no-clean",
+ action="store_true",
+ help="Do not clean up the temporary directory used for "
+ "benchmarking on the Android device")
parser.add_argument("--verbose",
action="store_true",
help="Print internal information during execution")
diff --git a/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp b/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
index 19786fd..48cba52 100644
--- a/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/PadLinalgOps.cpp
@@ -81,7 +81,7 @@
(paddingForM > 0 || paddingForK > 0)
? linalg::PadTensorOp::createPadScalarOp(
lhsPaddedType, lhs, lhsPaddingValue, createPadding({0, 0}),
- createPadding({paddingForM, paddingForK}), /*packing=*/false,
+ createPadding({paddingForM, paddingForK}), /*nofold=*/false,
loc, rewriter)
: lhs;
@@ -89,7 +89,7 @@
(paddingForK > 0 || paddingForN > 0)
? linalg::PadTensorOp::createPadScalarOp(
rhsPaddedType, rhs, rhsPaddingValue, createPadding({0, 0}),
- createPadding({paddingForK, paddingForN}), /*packing=*/false,
+ createPadding({paddingForK, paddingForN}), /*nofold=*/false,
loc, rewriter)
: rhs;
@@ -107,7 +107,7 @@
loc, rewriter.getZeroAttr(resultType.getElementType()));
Value paddedResult = linalg::PadTensorOp::createPadScalarOp(
newResultType, result, resultPaddingValue, createPadding({0, 0}),
- createPadding({paddingForM, paddingForN}), /*packing=*/false, loc,
+ createPadding({paddingForM, paddingForN}), /*nofold=*/false, loc,
rewriter);
auto paddedMatmulOp =
cast<linalg::LinalgOp>(matmulOp.getOperation())
diff --git a/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp b/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp
index 380e4c9..1f5084e 100644
--- a/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp
+++ b/iree/compiler/InputConversion/MHLO/MHLOToMHLOPreprocessing.cpp
@@ -141,9 +141,9 @@
paddingHigh.append(rank, 0);
interiorPadding.append(rank, 0);
for (auto iter :
- llvm::enumerate(op.dimension_numbers().input_spatial_dimensions())) {
+ llvm::enumerate(op.dimension_numbers().getInputSpatialDimensions())) {
unsigned idx = iter.index();
- unsigned dim = iter.value().getZExtValue();
+ unsigned dim = iter.value();
paddingLow[dim] = op.paddingAttr().getValue<int64_t>({idx, 0});
paddingHigh[dim] = op.paddingAttr().getValue<int64_t>({idx, 1});
}
@@ -195,19 +195,13 @@
}
auto dimensionNumbers = op.dimension_numbers();
- auto inputSpatialDimensions = dimensionNumbers.input_spatial_dimensions();
- llvm::SmallVector<int64_t, 4> spatialDims;
- for (auto dim : inputSpatialDimensions) {
- spatialDims.push_back(dim.getSExtValue());
- }
+ auto spatialDims = dimensionNumbers.getInputSpatialDimensions();
// Compute the permutation required to create a standard order.
llvm::SmallVector<int64_t, 4> permutations;
- permutations.push_back(
- dimensionNumbers.input_batch_dimension().getValue().getSExtValue());
+ permutations.push_back(dimensionNumbers.getInputBatchDimension());
permutations.append(spatialDims.begin(), spatialDims.end());
- permutations.push_back(
- dimensionNumbers.input_feature_dimension().getValue().getSExtValue());
+ permutations.push_back(dimensionNumbers.getInputFeatureDimension());
// If the permutation is iota then no reordering is required.
if (isIota(permutations)) {
@@ -227,18 +221,17 @@
llvm::SmallVector<int64_t, 4> newSpatialDimensions(spatialDims.size());
std::iota(newSpatialDimensions.begin(), newSpatialDimensions.end(), 1);
- auto newDimensionNumbers = mhlo::ConvDimensionNumbers::get(
- /*input_batch_dimension=*/rewriter.getI64IntegerAttr(0),
- /*input_feature_dimension=*/
- rewriter.getI64IntegerAttr(newSpatialDimensions.size() + 1),
- /*input_spatial_dimensions=*/
- rewriter.getI64TensorAttr(newSpatialDimensions),
- dimensionNumbers.kernel_input_feature_dimension(),
- dimensionNumbers.kernel_output_feature_dimension(),
- dimensionNumbers.kernel_spatial_dimensions(),
- dimensionNumbers.output_batch_dimension(),
- dimensionNumbers.output_feature_dimension(),
- dimensionNumbers.output_spatial_dimensions(), op.getContext());
+ auto newDimensionNumbers = mhlo::ConvDimensionNumbersAttr::get(
+ op.getContext(),
+ /*input_batch_dimension=*/0,
+ /*input_feature_dimension=*/newSpatialDimensions.size() + 1,
+ /*input_spatial_dimensions=*/newSpatialDimensions,
+ dimensionNumbers.getKernelInputFeatureDimension(),
+ dimensionNumbers.getKernelOutputFeatureDimension(),
+ dimensionNumbers.getKernelSpatialDimensions(),
+ dimensionNumbers.getOutputBatchDimension(),
+ dimensionNumbers.getOutputFeatureDimension(),
+ dimensionNumbers.getOutputSpatialDimensions());
SmallVector<Value, 2> operands = {transposed, op.rhs()};
auto newConv = rewriter.create<mhlo::ConvOp>(op.getLoc(), op.getType(),
@@ -261,19 +254,16 @@
auto dimensionNumbers = op.dimension_numbers();
- auto inputSpatialDimensions = dimensionNumbers.kernel_spatial_dimensions();
- llvm::SmallVector<int64_t, 4> spatialDims;
- for (auto dim : inputSpatialDimensions) {
- spatialDims.push_back(dim.getSExtValue());
- }
+ auto spatialDims = dimensionNumbers.getKernelSpatialDimensions();
auto inputFeatureDimension =
- dimensionNumbers.kernel_input_feature_dimension().getInt();
+ dimensionNumbers.getKernelInputFeatureDimension();
auto outputFeatureDimension =
- dimensionNumbers.kernel_output_feature_dimension().getInt();
+ dimensionNumbers.getKernelOutputFeatureDimension();
// Compute the permutation for the transpose.
- llvm::SmallVector<int64_t, 4> permutation(spatialDims);
+ llvm::SmallVector<int64_t, 4> permutation(spatialDims.begin(),
+ spatialDims.end());
permutation.push_back(inputFeatureDimension);
permutation.push_back(outputFeatureDimension);
@@ -293,18 +283,17 @@
RankedTensorType::get(transposeShape, kernelType.getElementType()),
kernel, rewriter.getI64TensorAttr(permutation));
- auto newDimensionNumbers = mhlo::ConvDimensionNumbers::get(
- dimensionNumbers.input_batch_dimension(),
- dimensionNumbers.input_feature_dimension(),
- dimensionNumbers.input_spatial_dimensions(),
+ auto newDimensionNumbers = mhlo::ConvDimensionNumbersAttr::get(
+ op.getContext(), dimensionNumbers.getInputBatchDimension(),
+ dimensionNumbers.getInputFeatureDimension(),
+ dimensionNumbers.getInputSpatialDimensions(),
/*kernel_input_feature_dimension=*/
- rewriter.getI64IntegerAttr(newSpatialDimensions.size()),
+ newSpatialDimensions.size(),
/*kernel_output_feature_dimension=*/
- rewriter.getI64IntegerAttr(newSpatialDimensions.size() + 1),
- rewriter.getI64TensorAttr(newSpatialDimensions),
- dimensionNumbers.output_batch_dimension(),
- dimensionNumbers.output_feature_dimension(),
- dimensionNumbers.output_spatial_dimensions(), op.getContext());
+ newSpatialDimensions.size() + 1, newSpatialDimensions,
+ dimensionNumbers.getOutputBatchDimension(),
+ dimensionNumbers.getOutputFeatureDimension(),
+ dimensionNumbers.getOutputSpatialDimensions());
SmallVector<Value, 2> operands = {op.lhs(), transposeKernel};
mhlo::ConvOp newConv = rewriter.create<mhlo::ConvOp>(
@@ -330,19 +319,13 @@
}
auto dimensionNumbers = op.dimension_numbers();
- auto outputSpatialDimensions = dimensionNumbers.output_spatial_dimensions();
- llvm::SmallVector<int64_t, 4> spatialDims;
- for (auto dim : outputSpatialDimensions) {
- spatialDims.push_back(dim.getSExtValue());
- }
+ auto spatialDims = dimensionNumbers.getOutputSpatialDimensions();
// Compute the permutation to transpose to an ordered output.
llvm::SmallVector<int64_t, 4> permutation;
- permutation.push_back(
- dimensionNumbers.output_batch_dimension().getValue().getSExtValue());
+ permutation.push_back(dimensionNumbers.getOutputBatchDimension());
permutation.append(spatialDims.begin(), spatialDims.end());
- permutation.push_back(
- dimensionNumbers.output_feature_dimension().getValue().getSExtValue());
+ permutation.push_back(dimensionNumbers.getOutputFeatureDimension());
// If the permutation is iota then no reordering is required.
if (isIota(permutation)) {
@@ -364,18 +347,16 @@
llvm::SmallVector<int64_t, 4> newSpatialDimensions(spatialDims.size());
std::iota(newSpatialDimensions.begin(), newSpatialDimensions.end(), 1);
- auto newDimensionNumbers = mhlo::ConvDimensionNumbers::get(
- dimensionNumbers.input_batch_dimension(),
- dimensionNumbers.input_feature_dimension(),
- dimensionNumbers.input_spatial_dimensions(),
- dimensionNumbers.kernel_input_feature_dimension(),
- dimensionNumbers.kernel_output_feature_dimension(),
- dimensionNumbers.kernel_spatial_dimensions(),
- /*output_batch_dimension=*/rewriter.getI64IntegerAttr(0),
- /*output_feature_dimension=*/
- rewriter.getI64IntegerAttr(newSpatialDimensions.size() + 1),
- /*output_spatial_dimensions=*/
- rewriter.getI64TensorAttr(newSpatialDimensions), op.getContext());
+ auto newDimensionNumbers = mhlo::ConvDimensionNumbersAttr::get(
+ op.getContext(), dimensionNumbers.getInputBatchDimension(),
+ dimensionNumbers.getInputFeatureDimension(),
+ dimensionNumbers.getInputSpatialDimensions(),
+ dimensionNumbers.getKernelInputFeatureDimension(),
+ dimensionNumbers.getKernelOutputFeatureDimension(),
+ dimensionNumbers.getKernelSpatialDimensions(),
+ /*output_batch_dimension=*/0,
+ /*output_feature_dimension=*/newSpatialDimensions.size() + 1,
+ /*output_spatial_dimensions=*/newSpatialDimensions);
SmallVector<Value, 2> operands = {op.lhs(), op.rhs()};
auto newConv = rewriter.create<mhlo::ConvOp>(
@@ -464,10 +445,10 @@
LogicalResult matchAndRewrite(mhlo::ConvOp op,
PatternRewriter &rewriter) const override {
- const auto featureInDim =
- op.dimension_numbers().kernel_input_feature_dimension().getInt();
- const auto featureOutDim =
- op.dimension_numbers().kernel_output_feature_dimension().getInt();
+ int64_t featureInDim =
+ op.dimension_numbers().getKernelInputFeatureDimension();
+ int64_t featureOutDim =
+ op.dimension_numbers().getKernelOutputFeatureDimension();
const auto &kernelShape = op.rhs().getType().cast<ShapedType>().getShape();
if (kernelShape[featureInDim] != 1) return failure();
diff --git a/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir b/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
index fbe2a7b..0ae429e 100644
--- a/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
+++ b/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
@@ -38,16 +38,17 @@
%0 = "mhlo.reshape"(%arg1) : (tensor<2x2x2x3xf32>) -> tensor<2x2x1x6xf32>
%1 = "mhlo.convolution"(%arg0, %0) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 2 : i64,
padding = dense<0> : tensor<2x2xi64>,
rhs_dilation = dense<1> : tensor<2xi64>,
diff --git a/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_extract_pad_from_conv.mlir b/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_extract_pad_from_conv.mlir
index 686fd4e..c3a4f6a 100644
--- a/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_extract_pad_from_conv.mlir
+++ b/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing_extract_pad_from_conv.mlir
@@ -9,16 +9,17 @@
func @conv(%inputs: tensor<1x4x5x2xf32>, %weights: tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32> {
%0 = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
rhs_dilation = dense<1> : tensor<2xi64>,
diff --git a/iree/test/e2e/models/edge_detection.mlir b/iree/test/e2e/models/edge_detection.mlir
index 5fbf4d2..6e65f21 100644
--- a/iree/test/e2e/models/edge_detection.mlir
+++ b/iree/test/e2e/models/edge_detection.mlir
@@ -12,9 +12,9 @@
func @edge_detect_sobel_operator(%arg0: tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32> {
%0 = mhlo.constant dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]> : tensor<3x3x1x1xf32>
%1 = mhlo.constant dense<[[[[1.000000e+00]], [[2.000000e+00]], [[1.000000e+00]]], [[[0.000000e+00]], [[0.000000e+00]], [[0.000000e+00]]], [[[-1.000000e+00]], [[-2.000000e+00]], [[-1.000000e+00]]]]> : tensor<3x3x1x1xf32>
- %2 = "mhlo.convolution"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
+ %2 = "mhlo.convolution"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
%3 = mhlo.multiply %2, %2 : tensor<1x128x128x1xf32>
- %4 = "mhlo.convolution"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
+ %4 = "mhlo.convolution"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
%5 = mhlo.multiply %4, %4 : tensor<1x128x128x1xf32>
%6 = mhlo.add %3, %5 : tensor<1x128x128x1xf32>
%7 = "mhlo.sqrt"(%6) : (tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32>
diff --git a/iree/test/e2e/models/mobilenetv3_fake_weights.mlir b/iree/test/e2e/models/mobilenetv3_fake_weights.mlir
index 5bcdccc..d6aae0f 100644
--- a/iree/test/e2e/models/mobilenetv3_fake_weights.mlir
+++ b/iree/test/e2e/models/mobilenetv3_fake_weights.mlir
@@ -690,7 +690,7 @@
%476 = util.global.load.indirect %208 : !util.ptr<tensor<1x1x1024x1000xf32>> -> tensor<1x1x1024x1000xf32>
%477 = mhlo.multiply %arg0, %210 : tensor<1x224x224x3xf32>
%478 = mhlo.add %477, %211 : tensor<1x224x224x3xf32>
- %479 = "mhlo.convolution"(%478, %278) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<[[0, 1], [0, 1]]> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x224x224x3xf32>, tensor<3x3x3x16xf32>) -> tensor<1x112x112x16xf32>
+ %479 = "mhlo.convolution"(%478, %278) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<[[0, 1], [0, 1]]> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x224x224x3xf32>, tensor<3x3x3x16xf32>) -> tensor<1x112x112x16xf32>
%480 = "mhlo.batch_norm_inference"(%479, %277, %276, %275, %274) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x112x112x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x112x112x16xf32>
%481 = mhlo.add %480, %212 : tensor<1x112x112x16xf32>
%482 = "mhlo.clamp"(%266, %481, %264) : (tensor<f32>, tensor<1x112x112x16xf32>, tensor<f32>) -> tensor<1x112x112x16xf32>
@@ -698,7 +698,7 @@
%484 = mhlo.multiply %483, %480 : tensor<1x112x112x16xf32>
%485 = "mhlo.pad"(%484, %266) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x112x112x16xf32>, tensor<f32>) -> tensor<1x113x113x16xf32>
%486 = "mhlo.reshape"(%465) : (tensor<3x3x16x1xf32>) -> tensor<3x3x1x16xf32>
- %487 = "mhlo.convolution"(%485, %486) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 16 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x113x113x16xf32>, tensor<3x3x1x16xf32>) -> tensor<1x56x56x16xf32>
+ %487 = "mhlo.convolution"(%485, %486) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 16 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x113x113x16xf32>, tensor<3x3x1x16xf32>) -> tensor<1x56x56x16xf32>
%488 = "mhlo.batch_norm_inference"(%487, %464, %463, %462, %461) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x56x56x16xf32>
%489 = mhlo.maximum %488, %246 : tensor<1x56x56x16xf32>
%490 = "mhlo.reduce"(%489, %266) ( {
@@ -708,11 +708,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x56x56x16xf32>, tensor<f32>) -> tensor<1x16xf32>
%491 = mhlo.divide %490, %247 : tensor<1x16xf32>
%492 = "mhlo.reshape"(%491) : (tensor<1x16xf32>) -> tensor<1x1x1x16xf32>
- %493 = "mhlo.convolution"(%492, %474) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x16xf32>, tensor<1x1x16x8xf32>) -> tensor<1x1x1x8xf32>
+ %493 = "mhlo.convolution"(%492, %474) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x16xf32>, tensor<1x1x16x8xf32>) -> tensor<1x1x1x8xf32>
%494 = "mhlo.broadcast_in_dim"(%473) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<8xf32>) -> tensor<1x1x1x8xf32>
%495 = mhlo.add %493, %494 : tensor<1x1x1x8xf32>
%496 = mhlo.maximum %495, %248 : tensor<1x1x1x8xf32>
- %497 = "mhlo.convolution"(%496, %472) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x8xf32>, tensor<1x1x8x16xf32>) -> tensor<1x1x1x16xf32>
+ %497 = "mhlo.convolution"(%496, %472) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x8xf32>, tensor<1x1x8x16xf32>) -> tensor<1x1x1x16xf32>
%498 = "mhlo.broadcast_in_dim"(%471) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<16xf32>) -> tensor<1x1x1x16xf32>
%499 = mhlo.add %497, %498 : tensor<1x1x1x16xf32>
%500 = mhlo.add %499, %213 : tensor<1x1x1x16xf32>
@@ -720,29 +720,29 @@
%502 = mhlo.multiply %501, %230 : tensor<1x1x1x16xf32>
%503 = "mhlo.broadcast_in_dim"(%502) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x16xf32>) -> tensor<1x56x56x16xf32>
%504 = mhlo.multiply %489, %503 : tensor<1x56x56x16xf32>
- %505 = "mhlo.convolution"(%504, %470) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x16xf32>, tensor<1x1x16x16xf32>) -> tensor<1x56x56x16xf32>
+ %505 = "mhlo.convolution"(%504, %470) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x16xf32>, tensor<1x1x16x16xf32>) -> tensor<1x56x56x16xf32>
%506 = "mhlo.batch_norm_inference"(%505, %469, %468, %467, %466) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<1x56x56x16xf32>
- %507 = "mhlo.convolution"(%506, %307) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x16xf32>, tensor<1x1x16x72xf32>) -> tensor<1x56x56x72xf32>
+ %507 = "mhlo.convolution"(%506, %307) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x16xf32>, tensor<1x1x16x72xf32>) -> tensor<1x56x56x72xf32>
%508 = "mhlo.batch_norm_inference"(%507, %306, %305, %304, %303) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x56x56x72xf32>, tensor<72xf32>, tensor<72xf32>, tensor<72xf32>, tensor<72xf32>) -> tensor<1x56x56x72xf32>
%509 = mhlo.maximum %508, %249 : tensor<1x56x56x72xf32>
%510 = "mhlo.pad"(%509, %266) {edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>, edge_padding_low = dense<0> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x56x56x72xf32>, tensor<f32>) -> tensor<1x57x57x72xf32>
%511 = "mhlo.reshape"(%302) : (tensor<3x3x72x1xf32>) -> tensor<3x3x1x72xf32>
- %512 = "mhlo.convolution"(%510, %511) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 72 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x57x57x72xf32>, tensor<3x3x1x72xf32>) -> tensor<1x28x28x72xf32>
+ %512 = "mhlo.convolution"(%510, %511) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 72 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x57x57x72xf32>, tensor<3x3x1x72xf32>) -> tensor<1x28x28x72xf32>
%513 = "mhlo.batch_norm_inference"(%512, %301, %300, %299, %298) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x72xf32>, tensor<72xf32>, tensor<72xf32>, tensor<72xf32>, tensor<72xf32>) -> tensor<1x28x28x72xf32>
%514 = mhlo.maximum %513, %250 : tensor<1x28x28x72xf32>
- %515 = "mhlo.convolution"(%514, %312) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x72xf32>, tensor<1x1x72x24xf32>) -> tensor<1x28x28x24xf32>
+ %515 = "mhlo.convolution"(%514, %312) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x72xf32>, tensor<1x1x72x24xf32>) -> tensor<1x28x28x24xf32>
%516 = "mhlo.batch_norm_inference"(%515, %311, %310, %309, %308) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x28x28x24xf32>
- %517 = "mhlo.convolution"(%516, %322) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x24xf32>, tensor<1x1x24x88xf32>) -> tensor<1x28x28x88xf32>
+ %517 = "mhlo.convolution"(%516, %322) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x24xf32>, tensor<1x1x24x88xf32>) -> tensor<1x28x28x88xf32>
%518 = "mhlo.batch_norm_inference"(%517, %321, %320, %319, %318) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x88xf32>, tensor<88xf32>, tensor<88xf32>, tensor<88xf32>, tensor<88xf32>) -> tensor<1x28x28x88xf32>
%519 = mhlo.maximum %518, %251 : tensor<1x28x28x88xf32>
%520 = "mhlo.reshape"(%317) : (tensor<3x3x88x1xf32>) -> tensor<3x3x1x88xf32>
- %521 = "mhlo.convolution"(%519, %520) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 88 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x88xf32>, tensor<3x3x1x88xf32>) -> tensor<1x28x28x88xf32>
+ %521 = "mhlo.convolution"(%519, %520) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 88 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x88xf32>, tensor<3x3x1x88xf32>) -> tensor<1x28x28x88xf32>
%522 = "mhlo.batch_norm_inference"(%521, %316, %315, %314, %313) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x88xf32>, tensor<88xf32>, tensor<88xf32>, tensor<88xf32>, tensor<88xf32>) -> tensor<1x28x28x88xf32>
%523 = mhlo.maximum %522, %251 : tensor<1x28x28x88xf32>
- %524 = "mhlo.convolution"(%523, %327) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x88xf32>, tensor<1x1x88x24xf32>) -> tensor<1x28x28x24xf32>
+ %524 = "mhlo.convolution"(%523, %327) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x88xf32>, tensor<1x1x88x24xf32>) -> tensor<1x28x28x24xf32>
%525 = "mhlo.batch_norm_inference"(%524, %326, %325, %324, %323) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>, tensor<24xf32>) -> tensor<1x28x28x24xf32>
%526 = mhlo.add %516, %525 : tensor<1x28x28x24xf32>
- %527 = "mhlo.convolution"(%526, %337) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x24xf32>, tensor<1x1x24x96xf32>) -> tensor<1x28x28x96xf32>
+ %527 = "mhlo.convolution"(%526, %337) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x24xf32>, tensor<1x1x24x96xf32>) -> tensor<1x28x28x96xf32>
%528 = "mhlo.batch_norm_inference"(%527, %336, %335, %334, %333) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x28x28x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x28x28x96xf32>
%529 = mhlo.add %528, %214 : tensor<1x28x28x96xf32>
%530 = "mhlo.clamp"(%266, %529, %264) : (tensor<f32>, tensor<1x28x28x96xf32>, tensor<f32>) -> tensor<1x28x28x96xf32>
@@ -750,7 +750,7 @@
%532 = mhlo.multiply %531, %528 : tensor<1x28x28x96xf32>
%533 = "mhlo.pad"(%532, %266) {edge_padding_high = dense<[0, 2, 2, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x28x28x96xf32>, tensor<f32>) -> tensor<1x31x31x96xf32>
%534 = "mhlo.reshape"(%332) : (tensor<5x5x96x1xf32>) -> tensor<5x5x1x96xf32>
- %535 = "mhlo.convolution"(%533, %534) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 96 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x31x31x96xf32>, tensor<5x5x1x96xf32>) -> tensor<1x14x14x96xf32>
+ %535 = "mhlo.convolution"(%533, %534) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 96 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x31x31x96xf32>, tensor<5x5x1x96xf32>) -> tensor<1x14x14x96xf32>
%536 = "mhlo.batch_norm_inference"(%535, %331, %330, %329, %328) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x14x14x96xf32>
%537 = mhlo.add %536, %215 : tensor<1x14x14x96xf32>
%538 = "mhlo.clamp"(%266, %537, %264) : (tensor<f32>, tensor<1x14x14x96xf32>, tensor<f32>) -> tensor<1x14x14x96xf32>
@@ -763,11 +763,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x14x14x96xf32>, tensor<f32>) -> tensor<1x96xf32>
%542 = mhlo.divide %541, %252 : tensor<1x96xf32>
%543 = "mhlo.reshape"(%542) : (tensor<1x96xf32>) -> tensor<1x1x1x96xf32>
- %544 = "mhlo.convolution"(%543, %346) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x1x1x24xf32>
+ %544 = "mhlo.convolution"(%543, %346) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x96xf32>, tensor<1x1x96x24xf32>) -> tensor<1x1x1x24xf32>
%545 = "mhlo.broadcast_in_dim"(%345) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<24xf32>) -> tensor<1x1x1x24xf32>
%546 = mhlo.add %544, %545 : tensor<1x1x1x24xf32>
%547 = mhlo.maximum %546, %253 : tensor<1x1x1x24xf32>
- %548 = "mhlo.convolution"(%547, %344) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x24xf32>, tensor<1x1x24x96xf32>) -> tensor<1x1x1x96xf32>
+ %548 = "mhlo.convolution"(%547, %344) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x24xf32>, tensor<1x1x24x96xf32>) -> tensor<1x1x1x96xf32>
%549 = "mhlo.broadcast_in_dim"(%343) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<96xf32>) -> tensor<1x1x1x96xf32>
%550 = mhlo.add %548, %549 : tensor<1x1x1x96xf32>
%551 = mhlo.add %550, %216 : tensor<1x1x1x96xf32>
@@ -775,16 +775,16 @@
%553 = mhlo.multiply %552, %233 : tensor<1x1x1x96xf32>
%554 = "mhlo.broadcast_in_dim"(%553) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x96xf32>) -> tensor<1x14x14x96xf32>
%555 = mhlo.multiply %540, %554 : tensor<1x14x14x96xf32>
- %556 = "mhlo.convolution"(%555, %342) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x96xf32>, tensor<1x1x96x40xf32>) -> tensor<1x14x14x40xf32>
+ %556 = "mhlo.convolution"(%555, %342) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x96xf32>, tensor<1x1x96x40xf32>) -> tensor<1x14x14x40xf32>
%557 = "mhlo.batch_norm_inference"(%556, %341, %340, %339, %338) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>) -> tensor<1x14x14x40xf32>
- %558 = "mhlo.convolution"(%557, %356) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x240xf32>) -> tensor<1x14x14x240xf32>
+ %558 = "mhlo.convolution"(%557, %356) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x240xf32>) -> tensor<1x14x14x240xf32>
%559 = "mhlo.batch_norm_inference"(%558, %355, %354, %353, %352) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>) -> tensor<1x14x14x240xf32>
%560 = mhlo.add %559, %217 : tensor<1x14x14x240xf32>
%561 = "mhlo.clamp"(%266, %560, %264) : (tensor<f32>, tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x14x14x240xf32>
%562 = mhlo.multiply %561, %234 : tensor<1x14x14x240xf32>
%563 = mhlo.multiply %562, %559 : tensor<1x14x14x240xf32>
%564 = "mhlo.reshape"(%351) : (tensor<5x5x240x1xf32>) -> tensor<5x5x1x240xf32>
- %565 = "mhlo.convolution"(%563, %564) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 240 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<5x5x1x240xf32>) -> tensor<1x14x14x240xf32>
+ %565 = "mhlo.convolution"(%563, %564) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 240 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<5x5x1x240xf32>) -> tensor<1x14x14x240xf32>
%566 = "mhlo.batch_norm_inference"(%565, %350, %349, %348, %347) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>) -> tensor<1x14x14x240xf32>
%567 = mhlo.add %566, %217 : tensor<1x14x14x240xf32>
%568 = "mhlo.clamp"(%266, %567, %264) : (tensor<f32>, tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x14x14x240xf32>
@@ -797,11 +797,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x240xf32>
%572 = mhlo.divide %571, %254 : tensor<1x240xf32>
%573 = "mhlo.reshape"(%572) : (tensor<1x240xf32>) -> tensor<1x1x1x240xf32>
- %574 = "mhlo.convolution"(%573, %365) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x240xf32>, tensor<1x1x240x64xf32>) -> tensor<1x1x1x64xf32>
+ %574 = "mhlo.convolution"(%573, %365) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x240xf32>, tensor<1x1x240x64xf32>) -> tensor<1x1x1x64xf32>
%575 = "mhlo.broadcast_in_dim"(%364) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x1x1x64xf32>
%576 = mhlo.add %574, %575 : tensor<1x1x1x64xf32>
%577 = mhlo.maximum %576, %255 : tensor<1x1x1x64xf32>
- %578 = "mhlo.convolution"(%577, %363) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x64xf32>, tensor<1x1x64x240xf32>) -> tensor<1x1x1x240xf32>
+ %578 = "mhlo.convolution"(%577, %363) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x64xf32>, tensor<1x1x64x240xf32>) -> tensor<1x1x1x240xf32>
%579 = "mhlo.broadcast_in_dim"(%362) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<240xf32>) -> tensor<1x1x1x240xf32>
%580 = mhlo.add %578, %579 : tensor<1x1x1x240xf32>
%581 = mhlo.add %580, %218 : tensor<1x1x1x240xf32>
@@ -809,17 +809,17 @@
%583 = mhlo.multiply %582, %235 : tensor<1x1x1x240xf32>
%584 = "mhlo.broadcast_in_dim"(%583) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x240xf32>) -> tensor<1x14x14x240xf32>
%585 = mhlo.multiply %570, %584 : tensor<1x14x14x240xf32>
- %586 = "mhlo.convolution"(%585, %361) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<1x1x240x40xf32>) -> tensor<1x14x14x40xf32>
+ %586 = "mhlo.convolution"(%585, %361) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<1x1x240x40xf32>) -> tensor<1x14x14x40xf32>
%587 = "mhlo.batch_norm_inference"(%586, %360, %359, %358, %357) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>) -> tensor<1x14x14x40xf32>
%588 = mhlo.add %557, %587 : tensor<1x14x14x40xf32>
- %589 = "mhlo.convolution"(%588, %375) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x240xf32>) -> tensor<1x14x14x240xf32>
+ %589 = "mhlo.convolution"(%588, %375) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x240xf32>) -> tensor<1x14x14x240xf32>
%590 = "mhlo.batch_norm_inference"(%589, %374, %373, %372, %371) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>) -> tensor<1x14x14x240xf32>
%591 = mhlo.add %590, %217 : tensor<1x14x14x240xf32>
%592 = "mhlo.clamp"(%266, %591, %264) : (tensor<f32>, tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x14x14x240xf32>
%593 = mhlo.multiply %592, %234 : tensor<1x14x14x240xf32>
%594 = mhlo.multiply %593, %590 : tensor<1x14x14x240xf32>
%595 = "mhlo.reshape"(%370) : (tensor<5x5x240x1xf32>) -> tensor<5x5x1x240xf32>
- %596 = "mhlo.convolution"(%594, %595) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 240 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<5x5x1x240xf32>) -> tensor<1x14x14x240xf32>
+ %596 = "mhlo.convolution"(%594, %595) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 240 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<5x5x1x240xf32>) -> tensor<1x14x14x240xf32>
%597 = "mhlo.batch_norm_inference"(%596, %369, %368, %367, %366) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>, tensor<240xf32>) -> tensor<1x14x14x240xf32>
%598 = mhlo.add %597, %217 : tensor<1x14x14x240xf32>
%599 = "mhlo.clamp"(%266, %598, %264) : (tensor<f32>, tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x14x14x240xf32>
@@ -832,11 +832,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<f32>) -> tensor<1x240xf32>
%603 = mhlo.divide %602, %254 : tensor<1x240xf32>
%604 = "mhlo.reshape"(%603) : (tensor<1x240xf32>) -> tensor<1x1x1x240xf32>
- %605 = "mhlo.convolution"(%604, %384) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x240xf32>, tensor<1x1x240x64xf32>) -> tensor<1x1x1x64xf32>
+ %605 = "mhlo.convolution"(%604, %384) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x240xf32>, tensor<1x1x240x64xf32>) -> tensor<1x1x1x64xf32>
%606 = "mhlo.broadcast_in_dim"(%383) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x1x1x64xf32>
%607 = mhlo.add %605, %606 : tensor<1x1x1x64xf32>
%608 = mhlo.maximum %607, %255 : tensor<1x1x1x64xf32>
- %609 = "mhlo.convolution"(%608, %382) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x64xf32>, tensor<1x1x64x240xf32>) -> tensor<1x1x1x240xf32>
+ %609 = "mhlo.convolution"(%608, %382) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x64xf32>, tensor<1x1x64x240xf32>) -> tensor<1x1x1x240xf32>
%610 = "mhlo.broadcast_in_dim"(%381) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<240xf32>) -> tensor<1x1x1x240xf32>
%611 = mhlo.add %609, %610 : tensor<1x1x1x240xf32>
%612 = mhlo.add %611, %218 : tensor<1x1x1x240xf32>
@@ -844,17 +844,17 @@
%614 = mhlo.multiply %613, %235 : tensor<1x1x1x240xf32>
%615 = "mhlo.broadcast_in_dim"(%614) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x240xf32>) -> tensor<1x14x14x240xf32>
%616 = mhlo.multiply %601, %615 : tensor<1x14x14x240xf32>
- %617 = "mhlo.convolution"(%616, %380) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<1x1x240x40xf32>) -> tensor<1x14x14x40xf32>
+ %617 = "mhlo.convolution"(%616, %380) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x240xf32>, tensor<1x1x240x40xf32>) -> tensor<1x14x14x40xf32>
%618 = "mhlo.batch_norm_inference"(%617, %379, %378, %377, %376) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>, tensor<40xf32>) -> tensor<1x14x14x40xf32>
%619 = mhlo.add %588, %618 : tensor<1x14x14x40xf32>
- %620 = "mhlo.convolution"(%619, %394) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x120xf32>) -> tensor<1x14x14x120xf32>
+ %620 = "mhlo.convolution"(%619, %394) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x40xf32>, tensor<1x1x40x120xf32>) -> tensor<1x14x14x120xf32>
%621 = "mhlo.batch_norm_inference"(%620, %393, %392, %391, %390) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x120xf32>, tensor<120xf32>, tensor<120xf32>, tensor<120xf32>, tensor<120xf32>) -> tensor<1x14x14x120xf32>
%622 = mhlo.add %621, %219 : tensor<1x14x14x120xf32>
%623 = "mhlo.clamp"(%266, %622, %264) : (tensor<f32>, tensor<1x14x14x120xf32>, tensor<f32>) -> tensor<1x14x14x120xf32>
%624 = mhlo.multiply %623, %236 : tensor<1x14x14x120xf32>
%625 = mhlo.multiply %624, %621 : tensor<1x14x14x120xf32>
%626 = "mhlo.reshape"(%389) : (tensor<5x5x120x1xf32>) -> tensor<5x5x1x120xf32>
- %627 = "mhlo.convolution"(%625, %626) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 120 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x120xf32>, tensor<5x5x1x120xf32>) -> tensor<1x14x14x120xf32>
+ %627 = "mhlo.convolution"(%625, %626) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 120 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x120xf32>, tensor<5x5x1x120xf32>) -> tensor<1x14x14x120xf32>
%628 = "mhlo.batch_norm_inference"(%627, %388, %387, %386, %385) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x120xf32>, tensor<120xf32>, tensor<120xf32>, tensor<120xf32>, tensor<120xf32>) -> tensor<1x14x14x120xf32>
%629 = mhlo.add %628, %219 : tensor<1x14x14x120xf32>
%630 = "mhlo.clamp"(%266, %629, %264) : (tensor<f32>, tensor<1x14x14x120xf32>, tensor<f32>) -> tensor<1x14x14x120xf32>
@@ -867,11 +867,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x14x14x120xf32>, tensor<f32>) -> tensor<1x120xf32>
%634 = mhlo.divide %633, %256 : tensor<1x120xf32>
%635 = "mhlo.reshape"(%634) : (tensor<1x120xf32>) -> tensor<1x1x1x120xf32>
- %636 = "mhlo.convolution"(%635, %403) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x120xf32>, tensor<1x1x120x32xf32>) -> tensor<1x1x1x32xf32>
+ %636 = "mhlo.convolution"(%635, %403) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x120xf32>, tensor<1x1x120x32xf32>) -> tensor<1x1x1x32xf32>
%637 = "mhlo.broadcast_in_dim"(%402) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<32xf32>) -> tensor<1x1x1x32xf32>
%638 = mhlo.add %636, %637 : tensor<1x1x1x32xf32>
%639 = mhlo.maximum %638, %257 : tensor<1x1x1x32xf32>
- %640 = "mhlo.convolution"(%639, %401) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x32xf32>, tensor<1x1x32x120xf32>) -> tensor<1x1x1x120xf32>
+ %640 = "mhlo.convolution"(%639, %401) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x32xf32>, tensor<1x1x32x120xf32>) -> tensor<1x1x1x120xf32>
%641 = "mhlo.broadcast_in_dim"(%400) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<120xf32>) -> tensor<1x1x1x120xf32>
%642 = mhlo.add %640, %641 : tensor<1x1x1x120xf32>
%643 = mhlo.add %642, %220 : tensor<1x1x1x120xf32>
@@ -879,16 +879,16 @@
%645 = mhlo.multiply %644, %237 : tensor<1x1x1x120xf32>
%646 = "mhlo.broadcast_in_dim"(%645) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x120xf32>) -> tensor<1x14x14x120xf32>
%647 = mhlo.multiply %632, %646 : tensor<1x14x14x120xf32>
- %648 = "mhlo.convolution"(%647, %399) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x120xf32>, tensor<1x1x120x48xf32>) -> tensor<1x14x14x48xf32>
+ %648 = "mhlo.convolution"(%647, %399) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x120xf32>, tensor<1x1x120x48xf32>) -> tensor<1x14x14x48xf32>
%649 = "mhlo.batch_norm_inference"(%648, %398, %397, %396, %395) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x48xf32>, tensor<48xf32>, tensor<48xf32>, tensor<48xf32>, tensor<48xf32>) -> tensor<1x14x14x48xf32>
- %650 = "mhlo.convolution"(%649, %413) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x48xf32>, tensor<1x1x48x144xf32>) -> tensor<1x14x14x144xf32>
+ %650 = "mhlo.convolution"(%649, %413) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x48xf32>, tensor<1x1x48x144xf32>) -> tensor<1x14x14x144xf32>
%651 = "mhlo.batch_norm_inference"(%650, %412, %411, %410, %409) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x14x14x144xf32>
%652 = mhlo.add %651, %221 : tensor<1x14x14x144xf32>
%653 = "mhlo.clamp"(%266, %652, %264) : (tensor<f32>, tensor<1x14x14x144xf32>, tensor<f32>) -> tensor<1x14x14x144xf32>
%654 = mhlo.multiply %653, %238 : tensor<1x14x14x144xf32>
%655 = mhlo.multiply %654, %651 : tensor<1x14x14x144xf32>
%656 = "mhlo.reshape"(%408) : (tensor<5x5x144x1xf32>) -> tensor<5x5x1x144xf32>
- %657 = "mhlo.convolution"(%655, %656) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 144 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x144xf32>, tensor<5x5x1x144xf32>) -> tensor<1x14x14x144xf32>
+ %657 = "mhlo.convolution"(%655, %656) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 144 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x144xf32>, tensor<5x5x1x144xf32>) -> tensor<1x14x14x144xf32>
%658 = "mhlo.batch_norm_inference"(%657, %407, %406, %405, %404) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>, tensor<144xf32>) -> tensor<1x14x14x144xf32>
%659 = mhlo.add %658, %221 : tensor<1x14x14x144xf32>
%660 = "mhlo.clamp"(%266, %659, %264) : (tensor<f32>, tensor<1x14x14x144xf32>, tensor<f32>) -> tensor<1x14x14x144xf32>
@@ -901,11 +901,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x14x14x144xf32>, tensor<f32>) -> tensor<1x144xf32>
%664 = mhlo.divide %663, %258 : tensor<1x144xf32>
%665 = "mhlo.reshape"(%664) : (tensor<1x144xf32>) -> tensor<1x1x1x144xf32>
- %666 = "mhlo.convolution"(%665, %422) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x40xf32>) -> tensor<1x1x1x40xf32>
+ %666 = "mhlo.convolution"(%665, %422) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x40xf32>) -> tensor<1x1x1x40xf32>
%667 = "mhlo.broadcast_in_dim"(%421) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<40xf32>) -> tensor<1x1x1x40xf32>
%668 = mhlo.add %666, %667 : tensor<1x1x1x40xf32>
%669 = mhlo.maximum %668, %259 : tensor<1x1x1x40xf32>
- %670 = "mhlo.convolution"(%669, %420) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x40xf32>, tensor<1x1x40x144xf32>) -> tensor<1x1x1x144xf32>
+ %670 = "mhlo.convolution"(%669, %420) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x40xf32>, tensor<1x1x40x144xf32>) -> tensor<1x1x1x144xf32>
%671 = "mhlo.broadcast_in_dim"(%419) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x1x1x144xf32>
%672 = mhlo.add %670, %671 : tensor<1x1x1x144xf32>
%673 = mhlo.add %672, %222 : tensor<1x1x1x144xf32>
@@ -913,10 +913,10 @@
%675 = mhlo.multiply %674, %239 : tensor<1x1x1x144xf32>
%676 = "mhlo.broadcast_in_dim"(%675) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x144xf32>) -> tensor<1x14x14x144xf32>
%677 = mhlo.multiply %662, %676 : tensor<1x14x14x144xf32>
- %678 = "mhlo.convolution"(%677, %418) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x144xf32>, tensor<1x1x144x48xf32>) -> tensor<1x14x14x48xf32>
+ %678 = "mhlo.convolution"(%677, %418) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x144xf32>, tensor<1x1x144x48xf32>) -> tensor<1x14x14x48xf32>
%679 = "mhlo.batch_norm_inference"(%678, %417, %416, %415, %414) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x48xf32>, tensor<48xf32>, tensor<48xf32>, tensor<48xf32>, tensor<48xf32>) -> tensor<1x14x14x48xf32>
%680 = mhlo.add %649, %679 : tensor<1x14x14x48xf32>
- %681 = "mhlo.convolution"(%680, %432) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x48xf32>, tensor<1x1x48x288xf32>) -> tensor<1x14x14x288xf32>
+ %681 = "mhlo.convolution"(%680, %432) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x48xf32>, tensor<1x1x48x288xf32>) -> tensor<1x14x14x288xf32>
%682 = "mhlo.batch_norm_inference"(%681, %431, %430, %429, %428) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x14x14x288xf32>, tensor<288xf32>, tensor<288xf32>, tensor<288xf32>, tensor<288xf32>) -> tensor<1x14x14x288xf32>
%683 = mhlo.add %682, %223 : tensor<1x14x14x288xf32>
%684 = "mhlo.clamp"(%266, %683, %264) : (tensor<f32>, tensor<1x14x14x288xf32>, tensor<f32>) -> tensor<1x14x14x288xf32>
@@ -924,7 +924,7 @@
%686 = mhlo.multiply %685, %682 : tensor<1x14x14x288xf32>
%687 = "mhlo.pad"(%686, %266) {edge_padding_high = dense<[0, 2, 2, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x14x14x288xf32>, tensor<f32>) -> tensor<1x17x17x288xf32>
%688 = "mhlo.reshape"(%427) : (tensor<5x5x288x1xf32>) -> tensor<5x5x1x288xf32>
- %689 = "mhlo.convolution"(%687, %688) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 288 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x17x17x288xf32>, tensor<5x5x1x288xf32>) -> tensor<1x7x7x288xf32>
+ %689 = "mhlo.convolution"(%687, %688) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 288 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x17x17x288xf32>, tensor<5x5x1x288xf32>) -> tensor<1x7x7x288xf32>
%690 = "mhlo.batch_norm_inference"(%689, %426, %425, %424, %423) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x288xf32>, tensor<288xf32>, tensor<288xf32>, tensor<288xf32>, tensor<288xf32>) -> tensor<1x7x7x288xf32>
%691 = mhlo.add %690, %224 : tensor<1x7x7x288xf32>
%692 = "mhlo.clamp"(%266, %691, %264) : (tensor<f32>, tensor<1x7x7x288xf32>, tensor<f32>) -> tensor<1x7x7x288xf32>
@@ -937,11 +937,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x288xf32>, tensor<f32>) -> tensor<1x288xf32>
%696 = mhlo.divide %695, %260 : tensor<1x288xf32>
%697 = "mhlo.reshape"(%696) : (tensor<1x288xf32>) -> tensor<1x1x1x288xf32>
- %698 = "mhlo.convolution"(%697, %441) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x288xf32>, tensor<1x1x288x72xf32>) -> tensor<1x1x1x72xf32>
+ %698 = "mhlo.convolution"(%697, %441) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x288xf32>, tensor<1x1x288x72xf32>) -> tensor<1x1x1x72xf32>
%699 = "mhlo.broadcast_in_dim"(%440) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<72xf32>) -> tensor<1x1x1x72xf32>
%700 = mhlo.add %698, %699 : tensor<1x1x1x72xf32>
%701 = mhlo.maximum %700, %261 : tensor<1x1x1x72xf32>
- %702 = "mhlo.convolution"(%701, %439) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x72xf32>, tensor<1x1x72x288xf32>) -> tensor<1x1x1x288xf32>
+ %702 = "mhlo.convolution"(%701, %439) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x72xf32>, tensor<1x1x72x288xf32>) -> tensor<1x1x1x288xf32>
%703 = "mhlo.broadcast_in_dim"(%438) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<288xf32>) -> tensor<1x1x1x288xf32>
%704 = mhlo.add %702, %703 : tensor<1x1x1x288xf32>
%705 = mhlo.add %704, %225 : tensor<1x1x1x288xf32>
@@ -949,16 +949,16 @@
%707 = mhlo.multiply %706, %242 : tensor<1x1x1x288xf32>
%708 = "mhlo.broadcast_in_dim"(%707) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x288xf32>) -> tensor<1x7x7x288xf32>
%709 = mhlo.multiply %694, %708 : tensor<1x7x7x288xf32>
- %710 = "mhlo.convolution"(%709, %437) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x288xf32>, tensor<1x1x288x96xf32>) -> tensor<1x7x7x96xf32>
+ %710 = "mhlo.convolution"(%709, %437) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x288xf32>, tensor<1x1x288x96xf32>) -> tensor<1x7x7x96xf32>
%711 = "mhlo.batch_norm_inference"(%710, %436, %435, %434, %433) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x7x7x96xf32>
- %712 = "mhlo.convolution"(%711, %451) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
+ %712 = "mhlo.convolution"(%711, %451) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
%713 = "mhlo.batch_norm_inference"(%712, %450, %449, %448, %447) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
%714 = mhlo.add %713, %227 : tensor<1x7x7x576xf32>
%715 = "mhlo.clamp"(%266, %714, %264) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
%716 = mhlo.multiply %715, %244 : tensor<1x7x7x576xf32>
%717 = mhlo.multiply %716, %713 : tensor<1x7x7x576xf32>
%718 = "mhlo.reshape"(%446) : (tensor<5x5x576x1xf32>) -> tensor<5x5x1x576xf32>
- %719 = "mhlo.convolution"(%717, %718) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 576 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<5x5x1x576xf32>) -> tensor<1x7x7x576xf32>
+ %719 = "mhlo.convolution"(%717, %718) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 576 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<5x5x1x576xf32>) -> tensor<1x7x7x576xf32>
%720 = "mhlo.batch_norm_inference"(%719, %445, %444, %443, %442) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
%721 = mhlo.add %720, %227 : tensor<1x7x7x576xf32>
%722 = "mhlo.clamp"(%266, %721, %264) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
@@ -971,11 +971,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x576xf32>
%726 = mhlo.divide %725, %263 : tensor<1x576xf32>
%727 = "mhlo.reshape"(%726) : (tensor<1x576xf32>) -> tensor<1x1x1x576xf32>
- %728 = "mhlo.convolution"(%727, %460) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x144xf32>) -> tensor<1x1x1x144xf32>
+ %728 = "mhlo.convolution"(%727, %460) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x144xf32>) -> tensor<1x1x1x144xf32>
%729 = "mhlo.broadcast_in_dim"(%459) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x1x1x144xf32>
%730 = mhlo.add %728, %729 : tensor<1x1x1x144xf32>
%731 = mhlo.maximum %730, %262 : tensor<1x1x1x144xf32>
- %732 = "mhlo.convolution"(%731, %458) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x576xf32>) -> tensor<1x1x1x576xf32>
+ %732 = "mhlo.convolution"(%731, %458) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x576xf32>) -> tensor<1x1x1x576xf32>
%733 = "mhlo.broadcast_in_dim"(%457) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x1x1x576xf32>
%734 = mhlo.add %732, %733 : tensor<1x1x1x576xf32>
%735 = mhlo.add %734, %226 : tensor<1x1x1x576xf32>
@@ -983,17 +983,17 @@
%737 = mhlo.multiply %736, %243 : tensor<1x1x1x576xf32>
%738 = "mhlo.broadcast_in_dim"(%737) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x576xf32>) -> tensor<1x7x7x576xf32>
%739 = mhlo.multiply %724, %738 : tensor<1x7x7x576xf32>
- %740 = "mhlo.convolution"(%739, %456) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x7x7x96xf32>
+ %740 = "mhlo.convolution"(%739, %456) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x7x7x96xf32>
%741 = "mhlo.batch_norm_inference"(%740, %455, %454, %453, %452) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x7x7x96xf32>
%742 = mhlo.add %711, %741 : tensor<1x7x7x96xf32>
- %743 = "mhlo.convolution"(%742, %288) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
+ %743 = "mhlo.convolution"(%742, %288) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
%744 = "mhlo.batch_norm_inference"(%743, %287, %286, %285, %284) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
%745 = mhlo.add %744, %227 : tensor<1x7x7x576xf32>
%746 = "mhlo.clamp"(%266, %745, %264) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
%747 = mhlo.multiply %746, %244 : tensor<1x7x7x576xf32>
%748 = mhlo.multiply %747, %744 : tensor<1x7x7x576xf32>
%749 = "mhlo.reshape"(%283) : (tensor<5x5x576x1xf32>) -> tensor<5x5x1x576xf32>
- %750 = "mhlo.convolution"(%748, %749) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 576 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<5x5x1x576xf32>) -> tensor<1x7x7x576xf32>
+ %750 = "mhlo.convolution"(%748, %749) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 576 : i64, padding = dense<2> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<5x5x1x576xf32>) -> tensor<1x7x7x576xf32>
%751 = "mhlo.batch_norm_inference"(%750, %282, %281, %280, %279) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
%752 = mhlo.add %751, %227 : tensor<1x7x7x576xf32>
%753 = "mhlo.clamp"(%266, %752, %264) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
@@ -1006,11 +1006,11 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x576xf32>
%757 = mhlo.divide %756, %263 : tensor<1x576xf32>
%758 = "mhlo.reshape"(%757) : (tensor<1x576xf32>) -> tensor<1x1x1x576xf32>
- %759 = "mhlo.convolution"(%758, %297) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x144xf32>) -> tensor<1x1x1x144xf32>
+ %759 = "mhlo.convolution"(%758, %297) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x144xf32>) -> tensor<1x1x1x144xf32>
%760 = "mhlo.broadcast_in_dim"(%296) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<144xf32>) -> tensor<1x1x1x144xf32>
%761 = mhlo.add %759, %760 : tensor<1x1x1x144xf32>
%762 = mhlo.maximum %761, %262 : tensor<1x1x1x144xf32>
- %763 = "mhlo.convolution"(%762, %295) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x576xf32>) -> tensor<1x1x1x576xf32>
+ %763 = "mhlo.convolution"(%762, %295) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x144xf32>, tensor<1x1x144x576xf32>) -> tensor<1x1x1x576xf32>
%764 = "mhlo.broadcast_in_dim"(%294) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<576xf32>) -> tensor<1x1x1x576xf32>
%765 = mhlo.add %763, %764 : tensor<1x1x1x576xf32>
%766 = mhlo.add %765, %226 : tensor<1x1x1x576xf32>
@@ -1018,10 +1018,10 @@
%768 = mhlo.multiply %767, %243 : tensor<1x1x1x576xf32>
%769 = "mhlo.broadcast_in_dim"(%768) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>} : (tensor<1x1x1x576xf32>) -> tensor<1x7x7x576xf32>
%770 = mhlo.multiply %755, %769 : tensor<1x7x7x576xf32>
- %771 = "mhlo.convolution"(%770, %293) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x7x7x96xf32>
+ %771 = "mhlo.convolution"(%770, %293) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<1x1x576x96xf32>) -> tensor<1x7x7x96xf32>
%772 = "mhlo.batch_norm_inference"(%771, %292, %291, %290, %289) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>, tensor<96xf32>) -> tensor<1x7x7x96xf32>
%773 = mhlo.add %742, %772 : tensor<1x7x7x96xf32>
- %774 = "mhlo.convolution"(%773, %271) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
+ %774 = "mhlo.convolution"(%773, %271) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x96xf32>, tensor<1x1x96x576xf32>) -> tensor<1x7x7x576xf32>
%775 = "mhlo.batch_norm_inference"(%774, %270, %269, %268, %267) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<1x7x7x576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>, tensor<576xf32>) -> tensor<1x7x7x576xf32>
%776 = mhlo.add %775, %227 : tensor<1x7x7x576xf32>
%777 = "mhlo.clamp"(%266, %776, %264) : (tensor<f32>, tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x7x7x576xf32>
@@ -1034,14 +1034,14 @@
}) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<1x7x7x576xf32>, tensor<f32>) -> tensor<1x576xf32>
%781 = mhlo.divide %780, %263 : tensor<1x576xf32>
%782 = "mhlo.reshape"(%781) : (tensor<1x576xf32>) -> tensor<1x1x1x576xf32>
- %783 = "mhlo.convolution"(%782, %273) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x1024xf32>) -> tensor<1x1x1x1024xf32>
+ %783 = "mhlo.convolution"(%782, %273) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x576xf32>, tensor<1x1x576x1024xf32>) -> tensor<1x1x1x1024xf32>
%784 = "mhlo.broadcast_in_dim"(%272) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x1x1x1024xf32>
%785 = mhlo.add %783, %784 : tensor<1x1x1x1024xf32>
%786 = mhlo.add %785, %228 : tensor<1x1x1x1024xf32>
%787 = "mhlo.clamp"(%266, %786, %264) : (tensor<f32>, tensor<1x1x1x1024xf32>, tensor<f32>) -> tensor<1x1x1x1024xf32>
%788 = mhlo.multiply %787, %245 : tensor<1x1x1x1024xf32>
%789 = mhlo.multiply %788, %785 : tensor<1x1x1x1024xf32>
- %790 = "mhlo.convolution"(%789, %476) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x1024xf32>, tensor<1x1x1024x1000xf32>) -> tensor<1x1x1x1000xf32>
+ %790 = "mhlo.convolution"(%789, %476) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x1x1024xf32>, tensor<1x1x1024x1000xf32>) -> tensor<1x1x1x1000xf32>
%791 = "mhlo.broadcast_in_dim"(%475) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1000xf32>) -> tensor<1x1x1x1000xf32>
%792 = mhlo.add %790, %791 : tensor<1x1x1x1000xf32>
%793 = "mhlo.reshape"(%792) : (tensor<1x1x1x1000xf32>) -> tensor<1x1000xf32>
diff --git a/iree/test/e2e/models/resnet50_fake_weights.mlir b/iree/test/e2e/models/resnet50_fake_weights.mlir
index 764d1fa..96192e3 100644
--- a/iree/test/e2e/models/resnet50_fake_weights.mlir
+++ b/iree/test/e2e/models/resnet50_fake_weights.mlir
@@ -980,7 +980,7 @@
%650 = util.global.load.indirect %319 : !util.ptr<tensor<1000xf32>> -> tensor<1000xf32>
%651 = util.global.load.indirect %318 : !util.ptr<tensor<2048x1000xf32>> -> tensor<2048x1000xf32>
%652 = "mhlo.pad"(%arg0, %331) {edge_padding_high = dense<[0, 3, 3, 0]> : tensor<4xi64>, edge_padding_low = dense<[0, 3, 3, 0]> : tensor<4xi64>, interior_padding = dense<0> : tensor<4xi64>} : (tensor<1x224x224x3xf32>, tensor<f32>) -> tensor<1x230x230x3xf32>
- %653 = "mhlo.convolution"(%652, %337) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x230x230x3xf32>, tensor<7x7x3x64xf32>) -> tensor<1x112x112x64xf32>
+ %653 = "mhlo.convolution"(%652, %337) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x230x230x3xf32>, tensor<7x7x3x64xf32>) -> tensor<1x112x112x64xf32>
%654 = "mhlo.broadcast_in_dim"(%336) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x112x112x64xf32>
%655 = mhlo.add %653, %654 : tensor<1x112x112x64xf32>
%656 = "mhlo.batch_norm_inference"(%655, %335, %334, %333, %332) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x112x112x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x112x112x64xf32>
@@ -991,273 +991,273 @@
%944 = mhlo.maximum %arg1, %arg2 : tensor<f32>
"mhlo.return"(%944) : (tensor<f32>) -> ()
}) {window_dimensions = dense<[1, 3, 3, 1]> : tensor<4xi64>, window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>} : (tensor<1x114x114x64xf32>, tensor<f32>) -> tensor<1x56x56x64xf32>
- %660 = "mhlo.convolution"(%659, %343) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
+ %660 = "mhlo.convolution"(%659, %343) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
%661 = "mhlo.broadcast_in_dim"(%342) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x56x56x256xf32>
%662 = mhlo.add %660, %661 : tensor<1x56x56x256xf32>
%663 = "mhlo.batch_norm_inference"(%662, %341, %340, %339, %338) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x56x56x256xf32>
- %664 = "mhlo.convolution"(%659, %349) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x64xf32>) -> tensor<1x56x56x64xf32>
+ %664 = "mhlo.convolution"(%659, %349) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x64xf32>) -> tensor<1x56x56x64xf32>
%665 = "mhlo.broadcast_in_dim"(%348) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%666 = mhlo.add %664, %665 : tensor<1x56x56x64xf32>
%667 = "mhlo.batch_norm_inference"(%666, %347, %346, %345, %344) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%668 = mhlo.maximum %667, %321 : tensor<1x56x56x64xf32>
- %669 = "mhlo.convolution"(%668, %355) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
+ %669 = "mhlo.convolution"(%668, %355) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
%670 = "mhlo.broadcast_in_dim"(%354) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%671 = mhlo.add %669, %670 : tensor<1x56x56x64xf32>
%672 = "mhlo.batch_norm_inference"(%671, %353, %352, %351, %350) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%673 = mhlo.maximum %672, %321 : tensor<1x56x56x64xf32>
- %674 = "mhlo.convolution"(%673, %361) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
+ %674 = "mhlo.convolution"(%673, %361) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
%675 = "mhlo.broadcast_in_dim"(%360) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x56x56x256xf32>
%676 = mhlo.add %674, %675 : tensor<1x56x56x256xf32>
%677 = "mhlo.batch_norm_inference"(%676, %359, %358, %357, %356) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x56x56x256xf32>
%678 = mhlo.add %663, %677 : tensor<1x56x56x256xf32>
%679 = mhlo.maximum %678, %322 : tensor<1x56x56x256xf32>
- %680 = "mhlo.convolution"(%679, %367) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x64xf32>) -> tensor<1x56x56x64xf32>
+ %680 = "mhlo.convolution"(%679, %367) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x64xf32>) -> tensor<1x56x56x64xf32>
%681 = "mhlo.broadcast_in_dim"(%366) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%682 = mhlo.add %680, %681 : tensor<1x56x56x64xf32>
%683 = "mhlo.batch_norm_inference"(%682, %365, %364, %363, %362) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%684 = mhlo.maximum %683, %321 : tensor<1x56x56x64xf32>
- %685 = "mhlo.convolution"(%684, %373) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
+ %685 = "mhlo.convolution"(%684, %373) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
%686 = "mhlo.broadcast_in_dim"(%372) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%687 = mhlo.add %685, %686 : tensor<1x56x56x64xf32>
%688 = "mhlo.batch_norm_inference"(%687, %371, %370, %369, %368) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%689 = mhlo.maximum %688, %321 : tensor<1x56x56x64xf32>
- %690 = "mhlo.convolution"(%689, %379) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
+ %690 = "mhlo.convolution"(%689, %379) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
%691 = "mhlo.broadcast_in_dim"(%378) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x56x56x256xf32>
%692 = mhlo.add %690, %691 : tensor<1x56x56x256xf32>
%693 = "mhlo.batch_norm_inference"(%692, %377, %376, %375, %374) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x56x56x256xf32>
%694 = mhlo.add %679, %693 : tensor<1x56x56x256xf32>
%695 = mhlo.maximum %694, %322 : tensor<1x56x56x256xf32>
- %696 = "mhlo.convolution"(%695, %385) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x64xf32>) -> tensor<1x56x56x64xf32>
+ %696 = "mhlo.convolution"(%695, %385) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x64xf32>) -> tensor<1x56x56x64xf32>
%697 = "mhlo.broadcast_in_dim"(%384) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%698 = mhlo.add %696, %697 : tensor<1x56x56x64xf32>
%699 = "mhlo.batch_norm_inference"(%698, %383, %382, %381, %380) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%700 = mhlo.maximum %699, %321 : tensor<1x56x56x64xf32>
- %701 = "mhlo.convolution"(%700, %391) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
+ %701 = "mhlo.convolution"(%700, %391) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<3x3x64x64xf32>) -> tensor<1x56x56x64xf32>
%702 = "mhlo.broadcast_in_dim"(%390) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x56x56x64xf32>
%703 = mhlo.add %701, %702 : tensor<1x56x56x64xf32>
%704 = "mhlo.batch_norm_inference"(%703, %389, %388, %387, %386) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<1x56x56x64xf32>
%705 = mhlo.maximum %704, %321 : tensor<1x56x56x64xf32>
- %706 = "mhlo.convolution"(%705, %397) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
+ %706 = "mhlo.convolution"(%705, %397) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x56x56x64xf32>, tensor<1x1x64x256xf32>) -> tensor<1x56x56x256xf32>
%707 = "mhlo.broadcast_in_dim"(%396) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x56x56x256xf32>
%708 = mhlo.add %706, %707 : tensor<1x56x56x256xf32>
%709 = "mhlo.batch_norm_inference"(%708, %395, %394, %393, %392) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x56x56x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x56x56x256xf32>
%710 = mhlo.add %695, %709 : tensor<1x56x56x256xf32>
%711 = mhlo.maximum %710, %322 : tensor<1x56x56x256xf32>
- %712 = "mhlo.convolution"(%711, %403) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x512xf32>) -> tensor<1x28x28x512xf32>
+ %712 = "mhlo.convolution"(%711, %403) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x512xf32>) -> tensor<1x28x28x512xf32>
%713 = "mhlo.broadcast_in_dim"(%402) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x28x28x512xf32>
%714 = mhlo.add %712, %713 : tensor<1x28x28x512xf32>
%715 = "mhlo.batch_norm_inference"(%714, %401, %400, %399, %398) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x28x28x512xf32>
- %716 = "mhlo.convolution"(%711, %409) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x128xf32>) -> tensor<1x28x28x128xf32>
+ %716 = "mhlo.convolution"(%711, %409) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x56x56x256xf32>, tensor<1x1x256x128xf32>) -> tensor<1x28x28x128xf32>
%717 = "mhlo.broadcast_in_dim"(%408) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%718 = mhlo.add %716, %717 : tensor<1x28x28x128xf32>
%719 = "mhlo.batch_norm_inference"(%718, %407, %406, %405, %404) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%720 = mhlo.maximum %719, %323 : tensor<1x28x28x128xf32>
- %721 = "mhlo.convolution"(%720, %415) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
+ %721 = "mhlo.convolution"(%720, %415) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
%722 = "mhlo.broadcast_in_dim"(%414) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%723 = mhlo.add %721, %722 : tensor<1x28x28x128xf32>
%724 = "mhlo.batch_norm_inference"(%723, %413, %412, %411, %410) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%725 = mhlo.maximum %724, %323 : tensor<1x28x28x128xf32>
- %726 = "mhlo.convolution"(%725, %421) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
+ %726 = "mhlo.convolution"(%725, %421) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
%727 = "mhlo.broadcast_in_dim"(%420) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x28x28x512xf32>
%728 = mhlo.add %726, %727 : tensor<1x28x28x512xf32>
%729 = "mhlo.batch_norm_inference"(%728, %419, %418, %417, %416) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x28x28x512xf32>
%730 = mhlo.add %715, %729 : tensor<1x28x28x512xf32>
%731 = mhlo.maximum %730, %324 : tensor<1x28x28x512xf32>
- %732 = "mhlo.convolution"(%731, %427) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
+ %732 = "mhlo.convolution"(%731, %427) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
%733 = "mhlo.broadcast_in_dim"(%426) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%734 = mhlo.add %732, %733 : tensor<1x28x28x128xf32>
%735 = "mhlo.batch_norm_inference"(%734, %425, %424, %423, %422) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%736 = mhlo.maximum %735, %323 : tensor<1x28x28x128xf32>
- %737 = "mhlo.convolution"(%736, %433) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
+ %737 = "mhlo.convolution"(%736, %433) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
%738 = "mhlo.broadcast_in_dim"(%432) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%739 = mhlo.add %737, %738 : tensor<1x28x28x128xf32>
%740 = "mhlo.batch_norm_inference"(%739, %431, %430, %429, %428) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%741 = mhlo.maximum %740, %323 : tensor<1x28x28x128xf32>
- %742 = "mhlo.convolution"(%741, %439) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
+ %742 = "mhlo.convolution"(%741, %439) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
%743 = "mhlo.broadcast_in_dim"(%438) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x28x28x512xf32>
%744 = mhlo.add %742, %743 : tensor<1x28x28x512xf32>
%745 = "mhlo.batch_norm_inference"(%744, %437, %436, %435, %434) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x28x28x512xf32>
%746 = mhlo.add %731, %745 : tensor<1x28x28x512xf32>
%747 = mhlo.maximum %746, %324 : tensor<1x28x28x512xf32>
- %748 = "mhlo.convolution"(%747, %445) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
+ %748 = "mhlo.convolution"(%747, %445) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
%749 = "mhlo.broadcast_in_dim"(%444) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%750 = mhlo.add %748, %749 : tensor<1x28x28x128xf32>
%751 = "mhlo.batch_norm_inference"(%750, %443, %442, %441, %440) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%752 = mhlo.maximum %751, %323 : tensor<1x28x28x128xf32>
- %753 = "mhlo.convolution"(%752, %451) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
+ %753 = "mhlo.convolution"(%752, %451) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
%754 = "mhlo.broadcast_in_dim"(%450) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%755 = mhlo.add %753, %754 : tensor<1x28x28x128xf32>
%756 = "mhlo.batch_norm_inference"(%755, %449, %448, %447, %446) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%757 = mhlo.maximum %756, %323 : tensor<1x28x28x128xf32>
- %758 = "mhlo.convolution"(%757, %457) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
+ %758 = "mhlo.convolution"(%757, %457) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
%759 = "mhlo.broadcast_in_dim"(%456) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x28x28x512xf32>
%760 = mhlo.add %758, %759 : tensor<1x28x28x512xf32>
%761 = "mhlo.batch_norm_inference"(%760, %455, %454, %453, %452) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x28x28x512xf32>
%762 = mhlo.add %747, %761 : tensor<1x28x28x512xf32>
%763 = mhlo.maximum %762, %324 : tensor<1x28x28x512xf32>
- %764 = "mhlo.convolution"(%763, %463) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
+ %764 = "mhlo.convolution"(%763, %463) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x128xf32>) -> tensor<1x28x28x128xf32>
%765 = "mhlo.broadcast_in_dim"(%462) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%766 = mhlo.add %764, %765 : tensor<1x28x28x128xf32>
%767 = "mhlo.batch_norm_inference"(%766, %461, %460, %459, %458) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%768 = mhlo.maximum %767, %323 : tensor<1x28x28x128xf32>
- %769 = "mhlo.convolution"(%768, %469) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
+ %769 = "mhlo.convolution"(%768, %469) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<3x3x128x128xf32>) -> tensor<1x28x28x128xf32>
%770 = "mhlo.broadcast_in_dim"(%468) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x28x28x128xf32>
%771 = mhlo.add %769, %770 : tensor<1x28x28x128xf32>
%772 = "mhlo.batch_norm_inference"(%771, %467, %466, %465, %464) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) -> tensor<1x28x28x128xf32>
%773 = mhlo.maximum %772, %323 : tensor<1x28x28x128xf32>
- %774 = "mhlo.convolution"(%773, %475) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
+ %774 = "mhlo.convolution"(%773, %475) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x28x28x128xf32>, tensor<1x1x128x512xf32>) -> tensor<1x28x28x512xf32>
%775 = "mhlo.broadcast_in_dim"(%474) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x28x28x512xf32>
%776 = mhlo.add %774, %775 : tensor<1x28x28x512xf32>
%777 = "mhlo.batch_norm_inference"(%776, %473, %472, %471, %470) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x28x28x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x28x28x512xf32>
%778 = mhlo.add %763, %777 : tensor<1x28x28x512xf32>
%779 = mhlo.maximum %778, %324 : tensor<1x28x28x512xf32>
- %780 = "mhlo.convolution"(%779, %481) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %780 = "mhlo.convolution"(%779, %481) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x1024xf32>) -> tensor<1x14x14x1024xf32>
%781 = "mhlo.broadcast_in_dim"(%480) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%782 = mhlo.add %780, %781 : tensor<1x14x14x1024xf32>
%783 = "mhlo.batch_norm_inference"(%782, %479, %478, %477, %476) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
- %784 = "mhlo.convolution"(%779, %487) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x256xf32>) -> tensor<1x14x14x256xf32>
+ %784 = "mhlo.convolution"(%779, %487) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x28x28x512xf32>, tensor<1x1x512x256xf32>) -> tensor<1x14x14x256xf32>
%785 = "mhlo.broadcast_in_dim"(%486) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%786 = mhlo.add %784, %785 : tensor<1x14x14x256xf32>
%787 = "mhlo.batch_norm_inference"(%786, %485, %484, %483, %482) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%788 = mhlo.maximum %787, %325 : tensor<1x14x14x256xf32>
- %789 = "mhlo.convolution"(%788, %493) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %789 = "mhlo.convolution"(%788, %493) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%790 = "mhlo.broadcast_in_dim"(%492) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%791 = mhlo.add %789, %790 : tensor<1x14x14x256xf32>
%792 = "mhlo.batch_norm_inference"(%791, %491, %490, %489, %488) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%793 = mhlo.maximum %792, %325 : tensor<1x14x14x256xf32>
- %794 = "mhlo.convolution"(%793, %499) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %794 = "mhlo.convolution"(%793, %499) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%795 = "mhlo.broadcast_in_dim"(%498) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%796 = mhlo.add %794, %795 : tensor<1x14x14x1024xf32>
%797 = "mhlo.batch_norm_inference"(%796, %497, %496, %495, %494) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%798 = mhlo.add %783, %797 : tensor<1x14x14x1024xf32>
%799 = mhlo.maximum %798, %326 : tensor<1x14x14x1024xf32>
- %800 = "mhlo.convolution"(%799, %505) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
+ %800 = "mhlo.convolution"(%799, %505) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
%801 = "mhlo.broadcast_in_dim"(%504) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%802 = mhlo.add %800, %801 : tensor<1x14x14x256xf32>
%803 = "mhlo.batch_norm_inference"(%802, %503, %502, %501, %500) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%804 = mhlo.maximum %803, %325 : tensor<1x14x14x256xf32>
- %805 = "mhlo.convolution"(%804, %511) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %805 = "mhlo.convolution"(%804, %511) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%806 = "mhlo.broadcast_in_dim"(%510) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%807 = mhlo.add %805, %806 : tensor<1x14x14x256xf32>
%808 = "mhlo.batch_norm_inference"(%807, %509, %508, %507, %506) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%809 = mhlo.maximum %808, %325 : tensor<1x14x14x256xf32>
- %810 = "mhlo.convolution"(%809, %517) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %810 = "mhlo.convolution"(%809, %517) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%811 = "mhlo.broadcast_in_dim"(%516) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%812 = mhlo.add %810, %811 : tensor<1x14x14x1024xf32>
%813 = "mhlo.batch_norm_inference"(%812, %515, %514, %513, %512) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%814 = mhlo.add %799, %813 : tensor<1x14x14x1024xf32>
%815 = mhlo.maximum %814, %326 : tensor<1x14x14x1024xf32>
- %816 = "mhlo.convolution"(%815, %523) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
+ %816 = "mhlo.convolution"(%815, %523) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
%817 = "mhlo.broadcast_in_dim"(%522) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%818 = mhlo.add %816, %817 : tensor<1x14x14x256xf32>
%819 = "mhlo.batch_norm_inference"(%818, %521, %520, %519, %518) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%820 = mhlo.maximum %819, %325 : tensor<1x14x14x256xf32>
- %821 = "mhlo.convolution"(%820, %529) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %821 = "mhlo.convolution"(%820, %529) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%822 = "mhlo.broadcast_in_dim"(%528) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%823 = mhlo.add %821, %822 : tensor<1x14x14x256xf32>
%824 = "mhlo.batch_norm_inference"(%823, %527, %526, %525, %524) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%825 = mhlo.maximum %824, %325 : tensor<1x14x14x256xf32>
- %826 = "mhlo.convolution"(%825, %535) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %826 = "mhlo.convolution"(%825, %535) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%827 = "mhlo.broadcast_in_dim"(%534) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%828 = mhlo.add %826, %827 : tensor<1x14x14x1024xf32>
%829 = "mhlo.batch_norm_inference"(%828, %533, %532, %531, %530) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%830 = mhlo.add %815, %829 : tensor<1x14x14x1024xf32>
%831 = mhlo.maximum %830, %326 : tensor<1x14x14x1024xf32>
- %832 = "mhlo.convolution"(%831, %541) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
+ %832 = "mhlo.convolution"(%831, %541) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
%833 = "mhlo.broadcast_in_dim"(%540) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%834 = mhlo.add %832, %833 : tensor<1x14x14x256xf32>
%835 = "mhlo.batch_norm_inference"(%834, %539, %538, %537, %536) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%836 = mhlo.maximum %835, %325 : tensor<1x14x14x256xf32>
- %837 = "mhlo.convolution"(%836, %547) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %837 = "mhlo.convolution"(%836, %547) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%838 = "mhlo.broadcast_in_dim"(%546) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%839 = mhlo.add %837, %838 : tensor<1x14x14x256xf32>
%840 = "mhlo.batch_norm_inference"(%839, %545, %544, %543, %542) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%841 = mhlo.maximum %840, %325 : tensor<1x14x14x256xf32>
- %842 = "mhlo.convolution"(%841, %553) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %842 = "mhlo.convolution"(%841, %553) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%843 = "mhlo.broadcast_in_dim"(%552) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%844 = mhlo.add %842, %843 : tensor<1x14x14x1024xf32>
%845 = "mhlo.batch_norm_inference"(%844, %551, %550, %549, %548) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%846 = mhlo.add %831, %845 : tensor<1x14x14x1024xf32>
%847 = mhlo.maximum %846, %326 : tensor<1x14x14x1024xf32>
- %848 = "mhlo.convolution"(%847, %559) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
+ %848 = "mhlo.convolution"(%847, %559) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
%849 = "mhlo.broadcast_in_dim"(%558) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%850 = mhlo.add %848, %849 : tensor<1x14x14x256xf32>
%851 = "mhlo.batch_norm_inference"(%850, %557, %556, %555, %554) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%852 = mhlo.maximum %851, %325 : tensor<1x14x14x256xf32>
- %853 = "mhlo.convolution"(%852, %565) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %853 = "mhlo.convolution"(%852, %565) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%854 = "mhlo.broadcast_in_dim"(%564) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%855 = mhlo.add %853, %854 : tensor<1x14x14x256xf32>
%856 = "mhlo.batch_norm_inference"(%855, %563, %562, %561, %560) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%857 = mhlo.maximum %856, %325 : tensor<1x14x14x256xf32>
- %858 = "mhlo.convolution"(%857, %571) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %858 = "mhlo.convolution"(%857, %571) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%859 = "mhlo.broadcast_in_dim"(%570) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%860 = mhlo.add %858, %859 : tensor<1x14x14x1024xf32>
%861 = "mhlo.batch_norm_inference"(%860, %569, %568, %567, %566) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%862 = mhlo.add %847, %861 : tensor<1x14x14x1024xf32>
%863 = mhlo.maximum %862, %326 : tensor<1x14x14x1024xf32>
- %864 = "mhlo.convolution"(%863, %577) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
+ %864 = "mhlo.convolution"(%863, %577) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x256xf32>) -> tensor<1x14x14x256xf32>
%865 = "mhlo.broadcast_in_dim"(%576) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%866 = mhlo.add %864, %865 : tensor<1x14x14x256xf32>
%867 = "mhlo.batch_norm_inference"(%866, %575, %574, %573, %572) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%868 = mhlo.maximum %867, %325 : tensor<1x14x14x256xf32>
- %869 = "mhlo.convolution"(%868, %583) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
+ %869 = "mhlo.convolution"(%868, %583) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<3x3x256x256xf32>) -> tensor<1x14x14x256xf32>
%870 = "mhlo.broadcast_in_dim"(%582) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<1x14x14x256xf32>
%871 = mhlo.add %869, %870 : tensor<1x14x14x256xf32>
%872 = "mhlo.batch_norm_inference"(%871, %581, %580, %579, %578) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>) -> tensor<1x14x14x256xf32>
%873 = mhlo.maximum %872, %325 : tensor<1x14x14x256xf32>
- %874 = "mhlo.convolution"(%873, %589) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
+ %874 = "mhlo.convolution"(%873, %589) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x14x14x256xf32>, tensor<1x1x256x1024xf32>) -> tensor<1x14x14x1024xf32>
%875 = "mhlo.broadcast_in_dim"(%588) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%876 = mhlo.add %874, %875 : tensor<1x14x14x1024xf32>
%877 = "mhlo.batch_norm_inference"(%876, %587, %586, %585, %584) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x14x14x1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>, tensor<1024xf32>) -> tensor<1x14x14x1024xf32>
%878 = mhlo.add %863, %877 : tensor<1x14x14x1024xf32>
%879 = mhlo.maximum %878, %326 : tensor<1x14x14x1024xf32>
- %880 = "mhlo.convolution"(%879, %595) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x2048xf32>) -> tensor<1x7x7x2048xf32>
+ %880 = "mhlo.convolution"(%879, %595) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x2048xf32>) -> tensor<1x7x7x2048xf32>
%881 = "mhlo.broadcast_in_dim"(%594) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%882 = mhlo.add %880, %881 : tensor<1x7x7x2048xf32>
%883 = "mhlo.batch_norm_inference"(%882, %593, %592, %591, %590) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
- %884 = "mhlo.convolution"(%879, %601) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x512xf32>) -> tensor<1x7x7x512xf32>
+ %884 = "mhlo.convolution"(%879, %601) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<2> : tensor<2xi64>} : (tensor<1x14x14x1024xf32>, tensor<1x1x1024x512xf32>) -> tensor<1x7x7x512xf32>
%885 = "mhlo.broadcast_in_dim"(%600) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%886 = mhlo.add %884, %885 : tensor<1x7x7x512xf32>
%887 = "mhlo.batch_norm_inference"(%886, %599, %598, %597, %596) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%888 = mhlo.maximum %887, %327 : tensor<1x7x7x512xf32>
- %889 = "mhlo.convolution"(%888, %607) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
+ %889 = "mhlo.convolution"(%888, %607) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
%890 = "mhlo.broadcast_in_dim"(%606) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%891 = mhlo.add %889, %890 : tensor<1x7x7x512xf32>
%892 = "mhlo.batch_norm_inference"(%891, %605, %604, %603, %602) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%893 = mhlo.maximum %892, %327 : tensor<1x7x7x512xf32>
- %894 = "mhlo.convolution"(%893, %613) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
+ %894 = "mhlo.convolution"(%893, %613) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
%895 = "mhlo.broadcast_in_dim"(%612) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%896 = mhlo.add %894, %895 : tensor<1x7x7x2048xf32>
%897 = "mhlo.batch_norm_inference"(%896, %611, %610, %609, %608) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%898 = mhlo.add %883, %897 : tensor<1x7x7x2048xf32>
%899 = mhlo.maximum %898, %328 : tensor<1x7x7x2048xf32>
- %900 = "mhlo.convolution"(%899, %619) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x2048xf32>, tensor<1x1x2048x512xf32>) -> tensor<1x7x7x512xf32>
+ %900 = "mhlo.convolution"(%899, %619) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x2048xf32>, tensor<1x1x2048x512xf32>) -> tensor<1x7x7x512xf32>
%901 = "mhlo.broadcast_in_dim"(%618) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%902 = mhlo.add %900, %901 : tensor<1x7x7x512xf32>
%903 = "mhlo.batch_norm_inference"(%902, %617, %616, %615, %614) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%904 = mhlo.maximum %903, %327 : tensor<1x7x7x512xf32>
- %905 = "mhlo.convolution"(%904, %625) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
+ %905 = "mhlo.convolution"(%904, %625) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
%906 = "mhlo.broadcast_in_dim"(%624) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%907 = mhlo.add %905, %906 : tensor<1x7x7x512xf32>
%908 = "mhlo.batch_norm_inference"(%907, %623, %622, %621, %620) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%909 = mhlo.maximum %908, %327 : tensor<1x7x7x512xf32>
- %910 = "mhlo.convolution"(%909, %631) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
+ %910 = "mhlo.convolution"(%909, %631) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
%911 = "mhlo.broadcast_in_dim"(%630) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%912 = mhlo.add %910, %911 : tensor<1x7x7x2048xf32>
%913 = "mhlo.batch_norm_inference"(%912, %629, %628, %627, %626) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%914 = mhlo.add %899, %913 : tensor<1x7x7x2048xf32>
%915 = mhlo.maximum %914, %328 : tensor<1x7x7x2048xf32>
- %916 = "mhlo.convolution"(%915, %637) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x2048xf32>, tensor<1x1x2048x512xf32>) -> tensor<1x7x7x512xf32>
+ %916 = "mhlo.convolution"(%915, %637) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x2048xf32>, tensor<1x1x2048x512xf32>) -> tensor<1x7x7x512xf32>
%917 = "mhlo.broadcast_in_dim"(%636) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%918 = mhlo.add %916, %917 : tensor<1x7x7x512xf32>
%919 = "mhlo.batch_norm_inference"(%918, %635, %634, %633, %632) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%920 = mhlo.maximum %919, %327 : tensor<1x7x7x512xf32>
- %921 = "mhlo.convolution"(%920, %643) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
+ %921 = "mhlo.convolution"(%920, %643) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<3x3x512x512xf32>) -> tensor<1x7x7x512xf32>
%922 = "mhlo.broadcast_in_dim"(%642) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<512xf32>) -> tensor<1x7x7x512xf32>
%923 = mhlo.add %921, %922 : tensor<1x7x7x512xf32>
%924 = "mhlo.batch_norm_inference"(%923, %641, %640, %639, %638) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) -> tensor<1x7x7x512xf32>
%925 = mhlo.maximum %924, %327 : tensor<1x7x7x512xf32>
- %926 = "mhlo.convolution"(%925, %649) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
+ %926 = "mhlo.convolution"(%925, %649) {batch_group_count = 1 : i64, dimension_numbers = #mhlo.conv<raw input_batch_dimension = 0, input_feature_dimension = 3, input_spatial_dimensions = [1, 2], kernel_input_feature_dimension = 2, kernel_output_feature_dimension = 3, kernel_spatial_dimensions = [0, 1], output_batch_dimension = 0, output_feature_dimension = 3, output_spatial_dimensions = [1, 2]>, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x7x7x512xf32>, tensor<1x1x512x2048xf32>) -> tensor<1x7x7x2048xf32>
%927 = "mhlo.broadcast_in_dim"(%648) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
%928 = mhlo.add %926, %927 : tensor<1x7x7x2048xf32>
%929 = "mhlo.batch_norm_inference"(%928, %647, %646, %645, %644) {epsilon = 1.001000e-05 : f32, feature_index = 3 : i64} : (tensor<1x7x7x2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>, tensor<2048xf32>) -> tensor<1x7x7x2048xf32>
diff --git a/iree/test/e2e/vulkan_specific/conv.mlir b/iree/test/e2e/vulkan_specific/conv.mlir
index 13f562f..8ae0422 100644
--- a/iree/test/e2e/vulkan_specific/conv.mlir
+++ b/iree/test/e2e/vulkan_specific/conv.mlir
@@ -49,16 +49,17 @@
: tensor<2x3x2x3xf32>
%2 = "mhlo.convolution"(%0, %1) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>}
diff --git a/iree/test/e2e/vulkan_specific/vectorized_conv.mlir b/iree/test/e2e/vulkan_specific/vectorized_conv.mlir
index ab6a290..b0dfabf 100644
--- a/iree/test/e2e/vulkan_specific/vectorized_conv.mlir
+++ b/iree/test/e2e/vulkan_specific/vectorized_conv.mlir
@@ -45,7 +45,18 @@
1.0, 1.0, 2.5, 3.0, 2.0, 1.0, 1.0, 0.5, 0.0, 4.5, 0.0, 1.0, 4.0, 1.5, 5.0, 0.0]]]]>
: tensor<2x2x4x32xf32>
- %0 = "mhlo.convolution"(%input, %filter) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x3x3x4xf32>, tensor<2x2x4x32xf32>) -> tensor<1x2x2x32xf32>
+ %0 = "mhlo.convolution"(%input, %filter) {batch_group_count = 1 : i64,
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >, feature_group_count = 1 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x3x3x4xf32>, tensor<2x2x4x32xf32>) -> tensor<1x2x2x32xf32>
check.expect_almost_eq_const(%0, dense<
[[[[113.25, 127.0, 198.0, 173.25, 159.5, 190.75, 135.5, 160.0,
@@ -79,7 +90,18 @@
[[[[2.0, 2.0, 4.0, 2.0, 1.5, 5.0, 3.5, 2.5, 2.5, 0.0, 0.5, 2.5, 4.5, 1.5, 0.0, 2.5]]]]>
: tensor<1x1x1x16xf32>
- %0 = "mhlo.convolution"(%input, %filter) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 16 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x4x16xf32>, tensor<1x1x1x16xf32>) -> tensor<1x1x4x16xf32>
+ %0 = "mhlo.convolution"(%input, %filter) {batch_group_count = 1 : i64,
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >, feature_group_count = 16 : i64, padding = dense<0> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x1x4x16xf32>, tensor<1x1x1x16xf32>) -> tensor<1x1x4x16xf32>
check.expect_almost_eq_const(%0, dense<
[[[[12.0, 15.0, 0.0, 3.0, 2.25, 17.5, 15.75, 5.0, 7.5, 0.0, 0.25, 7.5, 15.75, 10.5, 0.0, 16.25],
diff --git a/iree/test/e2e/xla_ops/convolution.mlir b/iree/test/e2e/xla_ops/convolution.mlir
index 8472efd..78f26f6 100644
--- a/iree/test/e2e/xla_ops/convolution.mlir
+++ b/iree/test/e2e/xla_ops/convolution.mlir
@@ -10,16 +10,17 @@
[[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
@@ -47,16 +48,17 @@
[[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 3 : i64,
- input_feature_dimension = 0 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 3,
+ input_feature_dimension = 0,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} : (tensor<2x4x4x1xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
@@ -79,16 +81,17 @@
[[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[2, 1]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [2, 1],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
@@ -111,16 +114,17 @@
[[ 9.0, 11.0], [10.0, 12.0]]]]> : tensor<1x3x2x2xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 0 : i64,
- kernel_spatial_dimensions = dense<[1, 3]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 0,
+ kernel_spatial_dimensions = [1, 3],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<1x3x2x2xf32>) -> tensor<1x2x3x1xf32>
@@ -143,16 +147,17 @@
[[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 2 : i64,
- output_feature_dimension = 0 : i64,
- output_spatial_dimensions = dense<[3, 1]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 2,
+ output_feature_dimension = 0,
+ output_spatial_dimensions = [3, 1]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x3x1x2xf32>
@@ -176,16 +181,17 @@
[[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
rhs_dilation = dense<1> : tensor<2xi64>,
@@ -214,16 +220,17 @@
[[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
rhs_dilation = dense<1> : tensor<2xi64>,
@@ -304,16 +311,17 @@
[103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
rhs_dilation = dense<1> : tensor<2xi64>,
window_strides = dense<1> : tensor<2xi64>} :
@@ -373,17 +381,17 @@
[-0.7792497, 0.31265917, -0.7236341 ]]]]> : tensor<2x2x2x3xf32>
%res = "mhlo.convolution"(%inputs, %weights) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>
- },
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 1 : i64,
padding = dense<0> : tensor<2x2xi64>,
rhs_dilation = dense<[2, 1]> : tensor<2xi64>,
@@ -406,17 +414,17 @@
%arg1 = util.unfoldable_constant dense<1.0> : tensor<2x2x2x3xf32>
%res = "mhlo.convolution"(%arg0, %arg1) {
batch_group_count = 1 : i64,
- dimension_numbers = {
- input_batch_dimension = 0 : i64,
- input_feature_dimension = 3 : i64,
- input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
- kernel_input_feature_dimension = 2 : i64,
- kernel_output_feature_dimension = 3 : i64,
- kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
- output_batch_dimension = 0 : i64,
- output_feature_dimension = 3 : i64,
- output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>
- },
+ dimension_numbers = #mhlo.conv<raw
+ input_batch_dimension = 0,
+ input_feature_dimension = 3,
+ input_spatial_dimensions = [1, 2],
+ kernel_input_feature_dimension = 2,
+ kernel_output_feature_dimension = 3,
+ kernel_spatial_dimensions = [0, 1],
+ output_batch_dimension = 0,
+ output_feature_dimension = 3,
+ output_spatial_dimensions = [1, 2]
+ >,
feature_group_count = 2 : i64,
padding = dense<0> : tensor<2x2xi64>,
rhs_dilation = dense<1> : tensor<2xi64>,
diff --git a/llvm-external-projects/iree-dialects/BUILD b/llvm-external-projects/iree-dialects/BUILD
index ff9cbdf..90340be 100644
--- a/llvm-external-projects/iree-dialects/BUILD
+++ b/llvm-external-projects/iree-dialects/BUILD
@@ -275,6 +275,7 @@
cc_library(
name = "IREEPyDMTransforms",
srcs = glob([
+ "lib/Dialect/IREEPyDM/Transforms/*.cpp",
"lib/Dialect/IREEPyDM/Transforms/RTL/*.cpp",
"lib/Dialect/IREEPyDM/Transforms/ToIREE/*.cpp",
]),
@@ -291,6 +292,7 @@
"@llvm-project//mlir:IR",
"@llvm-project//mlir:MathDialect",
"@llvm-project//mlir:Parser",
+ "@llvm-project//mlir:Pass",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TransformUtils",
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 5f7a535..471b25e 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 5f7a5353301b776ffb0e5fb048992898507bf7ee
+Subproject commit 471b25e217e635e058bbdbca8c693e2998380a60