Rename `xla_hlo` dialect to `mhlo` This is part of the current refactoring of the HLO related dialect. `xla_hlo` will be reintroduced in a new form later. PiperOrigin-RevId: 319916753

diff --git a/bindings/javatests/com/google/iree/simple_mul.mlir b/bindings/javatests/com/google/iree/simple_mul.mlir
index 2839639..7966f82 100644
--- a/bindings/javatests/com/google/iree/simple_mul.mlir
+++ b/bindings/javatests/com/google/iree/simple_mul.mlir

@@ -1,5 +1,5 @@
 func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
     attributes { iree.module.export } {
-  %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   return %0 : tensor<4xf32>
 }

diff --git a/bindings/python/pyiree/compiler/compiler_test.py b/bindings/python/pyiree/compiler/compiler_test.py
index 3f8a483..d3fd1f2 100644
--- a/bindings/python/pyiree/compiler/compiler_test.py
+++ b/bindings/python/pyiree/compiler/compiler_test.py

@@ -19,7 +19,7 @@
 SIMPLE_MUL_ASM = """
 func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
       attributes { iree.module.export } {
-    %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+    %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
     return %0 : tensor<4xf32>
 }
 """

diff --git a/bindings/python/pyiree/rt/system_api_test.py b/bindings/python/pyiree/rt/system_api_test.py
index f1702be..9d670ce 100644
--- a/bindings/python/pyiree/rt/system_api_test.py
+++ b/bindings/python/pyiree/rt/system_api_test.py

@@ -29,7 +29,7 @@
   module @arithmetic {
     func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
           attributes { iree.module.export } {
-        %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+        %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
         return %0 : tensor<4xf32>
     }
   }

diff --git a/bindings/python/pyiree/rt/vm_test.py b/bindings/python/pyiree/rt/vm_test.py
index 6b633ce..5a3c1ec 100644
--- a/bindings/python/pyiree/rt/vm_test.py
+++ b/bindings/python/pyiree/rt/vm_test.py

@@ -39,7 +39,7 @@
   input_module = ctx.parse_asm("""
     func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
           attributes { iree.module.export } {
-        %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+        %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
         return %0 : tensor<4xf32>
     }
     """)
@@ -55,7 +55,7 @@
   input_module = ctx.parse_asm("""
     func @simple_mul(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32>
           attributes { iree.module.export } {
-        %0 = "xla_hlo.abs"(%arg0) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+        %0 = "mhlo.abs"(%arg0) : (tensor<?x?xf32>) -> tensor<?x?xf32>
         return %0 : tensor<?x?xf32>
     }
     """)

diff --git a/build_tools/third_party/tensorflow/tensorflow/compiler/mlir/hlo/CMakeLists.txt b/build_tools/third_party/tensorflow/tensorflow/compiler/mlir/hlo/CMakeLists.txt
index 0402e85..7bcaf34 100644
--- a/build_tools/third_party/tensorflow/tensorflow/compiler/mlir/hlo/CMakeLists.txt
+++ b/build_tools/third_party/tensorflow/tensorflow/compiler/mlir/hlo/CMakeLists.txt

@@ -44,7 +44,7 @@
     "lib/Dialect/mhlo/transforms/lower_general_dot.cc"
     "lib/Dialect/mhlo/transforms/materialize_broadcasts.cc"
     "lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc"
-    "lib/Dialect/mhlo/transforms/xla_hlo_fusion.cc"
+    "lib/Dialect/mhlo/transforms/mhlo_fusion.cc"
     "lib/Dialect/mhlo/transforms/xla_legalize_to_linalg.cc"
     "lib/utils/broadcast_utils.cc"
     "lib/utils/convert_op_folder.cc"

diff --git a/colab/edge_detection.ipynb b/colab/edge_detection.ipynb
index 97af8d2..e34c39a 100644
--- a/colab/edge_detection.ipynb
+++ b/colab/edge_detection.ipynb

@@ -26,9 +26,9 @@
         "\n",
         "1.  Define a `tf.Module` containing a `@tf.function` that performs edge detection\n",
         "2.  Save the `tf.Module` as a `SavedModel`\n",
-        "3.  Use IREE's python bindings to load the `SavedModel` into MLIR in the `xla_hlo` dialect\n",
+        "3.  Use IREE's python bindings to load the `SavedModel` into MLIR in the `mhlo` dialect\n",
         "4.  Save the MLIR to a file (can stop here to use it from another application)\n",
-        "5.  Compile the `xla_hlo` MLIR into a VM module for IREE to execute\n",
+        "5.  Compile the `mhlo` MLIR into a VM module for IREE to execute\n",
         "6.  Run the VM module through IREE's runtime to test the edge detection function"
       ]
     },
@@ -91,11 +91,11 @@
         "save_options = tf.saved_model.SaveOptions(save_debug_info=True)\n",
         "tf.saved_model.save(tf_module, saved_model_path, options=save_options)\n",
         "\n",
-        "# Compile from SavedModel to MLIR xla_hlo, then save to a file.\n",
+        "# Compile from SavedModel to MLIR mhlo, then save to a file.\n",
         "# \n",
         "# Do *not* further compile to a bytecode module for a particular backend.\n",
         "# \n",
-        "# By stopping at xla_hlo in text format, we can more easily take advantage of\n",
+        "# By stopping at mhlo in text format, we can more easily take advantage of\n",
         "# future compiler improvements within IREE and can use iree_bytecode_module to\n",
         "# compile and bundle the module into a sample application. For a production\n",
         "# application, we would probably want to freeze the version of IREE used and\n",
@@ -119,14 +119,14 @@
             "\n",
             "module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 175 : i32}} {\n",
             "  func @edge_detect_sobel_operator(%arg0: tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32> attributes {iree.module.export, iree.reflection = {abi = \"sip\", abiv = 1 : i32, sip = \"I8!S5!k0_0R3!_0\"}, tf._input_shapes = [\"tfshape$dim { size: 1 } dim { size: 128 } dim { size: 128 } dim { size: 1 }\"]} {\n",
-            "    %0 = xla_hlo.constant dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]> : tensor<3x3x1x1xf32>\n",
-            "    %1 = xla_hlo.constant dense<[[[[1.000000e+00]], [[2.000000e+00]], [[1.000000e+00]]], [[[0.000000e+00]], [[0.000000e+00]], [[0.000000e+00]]], [[[-1.000000e+00]], [[-2.000000e+00]], [[-1.000000e+00]]]]> : tensor<3x3x1x1xf32>\n",
-            "    %2 = \"xla_hlo.convolution\"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>\n",
-            "    %3 = xla_hlo.multiply %2, %2 : tensor<1x128x128x1xf32>\n",
-            "    %4 = \"xla_hlo.convolution\"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>\n",
-            "    %5 = xla_hlo.multiply %4, %4 : tensor<1x128x128x1xf32>\n",
-            "    %6 = xla_hlo.add %3, %5 : tensor<1x128x128x1xf32>\n",
-            "    %7 = \"xla_hlo.sqrt\"(%6) : (tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32>\n",
+            "    %0 = mhlo.constant dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]> : tensor<3x3x1x1xf32>\n",
+            "    %1 = mhlo.constant dense<[[[[1.000000e+00]], [[2.000000e+00]], [[1.000000e+00]]], [[[0.000000e+00]], [[0.000000e+00]], [[0.000000e+00]]], [[[-1.000000e+00]], [[-2.000000e+00]], [[-1.000000e+00]]]]> : tensor<3x3x1x1xf32>\n",
+            "    %2 = \"mhlo.convolution\"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>\n",
+            "    %3 = mhlo.multiply %2, %2 : tensor<1x128x128x1xf32>\n",
+            "    %4 = \"mhlo.convolution\"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>\n",
+            "    %5 = mhlo.multiply %4, %4 : tensor<1x128x128x1xf32>\n",
+            "    %6 = mhlo.add %3, %5 : tensor<1x128x128x1xf32>\n",
+            "    %7 = \"mhlo.sqrt\"(%6) : (tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32>\n",
             "    return %7 : tensor<1x128x128x1xf32>\n",
             "  }\n",
             "}\n",

diff --git a/colab/low_level_invoke_function.ipynb b/colab/low_level_invoke_function.ipynb
index 94a6089..31b5118 100644
--- a/colab/low_level_invoke_function.ipynb
+++ b/colab/low_level_invoke_function.ipynb

@@ -54,7 +54,7 @@
         "  module @arithmetic {\n",
         "    func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>\n",
         "          attributes { iree.module.export } {\n",
-        "        %0 = \"xla_hlo.multiply\"(%arg0, %arg1) {name = \"mul.1\"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>\n",
+        "        %0 = \"mhlo.multiply\"(%arg0, %arg1) {name = \"mul.1\"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>\n",
         "        return %0 : tensor<4xf32>\n",
         "    } \n",
         "  }\n",

diff --git a/colab/mnist_tensorflow.ipynb b/colab/mnist_tensorflow.ipynb
index 734e338..12c8a4e 100644
--- a/colab/mnist_tensorflow.ipynb
+++ b/colab/mnist_tensorflow.ipynb

@@ -507,7 +507,7 @@
         "outputId": "24cded90-c436-47ce-b4f4-7a5da46ea38a"
       },
       "source": [
-        "#@title Load the SavedModel into IREE's compiler as MLIR xla_hlo\n",
+        "#@title Load the SavedModel into IREE's compiler as MLIR mhlo\n",
         "\n",
         "compiler_module = ireec.tf_load_saved_model(\n",
         "    saved_model_dir, exported_names=[\"predict\"])\n",
@@ -537,36 +537,36 @@
             "    %1 = flow.variable.address @\"__iree_flow___sm_node15__model.layer-1.bias\" : !iree.ptr<tensor<128xf32>>\n",
             "    %2 = flow.variable.address @\"__iree_flow___sm_node20__model.layer-2.kernel\" : !iree.ptr<tensor<128x10xf32>>\n",
             "    %3 = flow.variable.address @\"__iree_flow___sm_node21__model.layer-2.bias\" : !iree.ptr<tensor<10xf32>>\n",
-            "    %4 = xla_hlo.constant dense<0xFF800000> : tensor<f32>\n",
-            "    %5 = xla_hlo.constant dense<0.000000e+00> : tensor<f32>\n",
+            "    %4 = mhlo.constant dense<0xFF800000> : tensor<f32>\n",
+            "    %5 = mhlo.constant dense<0.000000e+00> : tensor<f32>\n",
             "    %6 = flow.variable.load.indirect %3 : !iree.ptr<tensor<10xf32>> -> tensor<10xf32>\n",
             "    %7 = flow.variable.load.indirect %2 : !iree.ptr<tensor<128x10xf32>> -> tensor<128x10xf32>\n",
             "    %8 = flow.variable.load.indirect %1 : !iree.ptr<tensor<128xf32>> -> tensor<128xf32>\n",
             "    %9 = flow.variable.load.indirect %0 : !iree.ptr<tensor<784x128xf32>> -> tensor<784x128xf32>\n",
-            "    %10 = \"xla_hlo.reshape\"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>\n",
-            "    %11 = \"xla_hlo.dot\"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>\n",
-            "    %12 = \"xla_hlo.broadcast_in_dim\"(%8) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x128xf32>\n",
-            "    %13 = xla_hlo.add %11, %12 : tensor<1x128xf32>\n",
-            "    %14 = \"xla_hlo.broadcast_in_dim\"(%5) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x128xf32>\n",
-            "    %15 = xla_hlo.maximum %14, %13 : tensor<1x128xf32>\n",
-            "    %16 = \"xla_hlo.dot\"(%15, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>\n",
-            "    %17 = \"xla_hlo.broadcast_in_dim\"(%6) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<10xf32>) -> tensor<1x10xf32>\n",
-            "    %18 = xla_hlo.add %16, %17 : tensor<1x10xf32>\n",
-            "    %19 = \"xla_hlo.reduce\"(%18, %4) ( {\n",
+            "    %10 = \"mhlo.reshape\"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>\n",
+            "    %11 = \"mhlo.dot\"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>\n",
+            "    %12 = \"mhlo.broadcast_in_dim\"(%8) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<128xf32>) -> tensor<1x128xf32>\n",
+            "    %13 = mhlo.add %11, %12 : tensor<1x128xf32>\n",
+            "    %14 = \"mhlo.broadcast_in_dim\"(%5) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x128xf32>\n",
+            "    %15 = mhlo.maximum %14, %13 : tensor<1x128xf32>\n",
+            "    %16 = \"mhlo.dot\"(%15, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>\n",
+            "    %17 = \"mhlo.broadcast_in_dim\"(%6) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<10xf32>) -> tensor<1x10xf32>\n",
+            "    %18 = mhlo.add %16, %17 : tensor<1x10xf32>\n",
+            "    %19 = \"mhlo.reduce\"(%18, %4) ( {\n",
             "    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors\n",
-            "      %26 = xla_hlo.maximum %arg1, %arg2 : tensor<f32>\n",
-            "      \"xla_hlo.return\"(%26) : (tensor<f32>) -> ()\n",
+            "      %26 = mhlo.maximum %arg1, %arg2 : tensor<f32>\n",
+            "      \"mhlo.return\"(%26) : (tensor<f32>) -> ()\n",
             "    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>\n",
-            "    %20 = \"xla_hlo.broadcast_in_dim\"(%19) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>\n",
-            "    %21 = xla_hlo.subtract %18, %20 : tensor<1x10xf32>\n",
-            "    %22 = \"xla_hlo.exponential\"(%21) : (tensor<1x10xf32>) -> tensor<1x10xf32>\n",
-            "    %23 = \"xla_hlo.reduce\"(%22, %5) ( {\n",
+            "    %20 = \"mhlo.broadcast_in_dim\"(%19) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>\n",
+            "    %21 = mhlo.subtract %18, %20 : tensor<1x10xf32>\n",
+            "    %22 = \"mhlo.exponential\"(%21) : (tensor<1x10xf32>) -> tensor<1x10xf32>\n",
+            "    %23 = \"mhlo.reduce\"(%22, %5) ( {\n",
             "    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors\n",
-            "      %26 = xla_hlo.add %arg1, %arg2 : tensor<f32>\n",
-            "      \"xla_hlo.return\"(%26) : (tensor<f32>) -> ()\n",
+            "      %26 = mhlo.add %arg1, %arg2 : tensor<f32>\n",
+            "      \"mhlo.return\"(%26) : (tensor<f32>) -> ()\n",
             "    }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>\n",
-            "    %24 = \"xla_hlo.broadcast_in_dim\"(%23) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>\n",
-            "    %25 = xla_hlo.divide %22, %24 : tensor<1x10xf32>\n",
+            "    %24 = \"mhlo.broadcast_in_dim\"(%23) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>\n",
+            "    %25 = mhlo.divide %22, %24 : tensor<1x10xf32>\n",
             "    return %25 : tensor<1x10xf32>\n",
             "  }\n",
             "}\r\n",
@@ -588,7 +588,7 @@
         "outputId": "b8958b7f-c7bb-4fbd-b800-e58c46134086"
       },
       "source": [
-        "#@title Compile the xla_hlo MLIR and prepare a context to execute it\n",
+        "#@title Compile the mhlo MLIR and prepare a context to execute it\n",
         "\n",
         "# Compile the MLIR module into a VM module for execution\n",
         "flatbuffer_blob = compiler_module.compile(target_backends=[backend_name])\n",

diff --git a/colab/simple_tensorflow_module_import.ipynb b/colab/simple_tensorflow_module_import.ipynb
index 3ba73a0..600afc1 100644
--- a/colab/simple_tensorflow_module_import.ipynb
+++ b/colab/simple_tensorflow_module_import.ipynb

@@ -141,9 +141,9 @@
             "  \"tf_saved_model.global_tensor\"() {is_mutable, sym_name = \"__sm_node1__v\", tf_saved_model.exported_names = [\"v\"], type = tensor<1xf32>, value = dense<4.000000e+00> : tensor<1xf32>} : () -> ()\n",
             "  func @__inference_add_10820(%arg0: tensor<4xf32> {tf_saved_model.index_path = [0]}, %arg1: tensor<4xf32> {tf_saved_model.index_path = [1]}, %arg2: tensor<*x!tf.resource> {tf_saved_model.bound_input = @__sm_node1__v}) -> (tensor<4xf32> {tf_saved_model.index_path = []}) attributes {tf._input_shapes = [\"tfshape$dim { size: 4 }\", \"tfshape$dim { size: 4 }\", \"tfshape$unknown_rank: true\"], tf.signature.is_stateful, tf_saved_model.exported_names = [\"add\"]} {\n",
             "    %0 = \"tf.ReadVariableOp\"(%arg2) {_output_shapes = [\"tfshape$dim { size: 1 }\"], device = \"\", dtype = f32} : (tensor<*x!tf.resource>) -> tensor<1xf32>\n",
-            "    %1 = \"xla_hlo.multiply\"(%0, %arg0) : (tensor<1xf32>, tensor<4xf32>) -> tensor<4xf32>\n",
-            "    %2 = xla_hlo.add %1, %arg1 : tensor<4xf32>\n",
-            "    %3 = \"xla_hlo.tanh\"(%2) : (tensor<4xf32>) -> tensor<4xf32>\n",
+            "    %1 = \"mhlo.multiply\"(%0, %arg0) : (tensor<1xf32>, tensor<4xf32>) -> tensor<4xf32>\n",
+            "    %2 = mhlo.add %1, %arg1 : tensor<4xf32>\n",
+            "    %3 = \"mhlo.tanh\"(%2) : (tensor<4xf32>) -> tensor<4xf32>\n",
             "    return %3 : tensor<4xf32>\n",
             "  }\n",
             "}\n",

diff --git a/docs/developer_overview.md b/docs/developer_overview.md
index f23cd89..56fb5f8 100644
--- a/docs/developer_overview.md
+++ b/docs/developer_overview.md

@@ -56,7 +56,7 @@
 IREE's compiler components accept programs and code fragments in several
 formats, including high level TensorFlow Python code, serialized TensorFlow
 [SavedModel](https://www.tensorflow.org/guide/saved_model) programs, and lower
-level textual MLIR files using combinations of supported dialects like `xla_hlo`
+level textual MLIR files using combinations of supported dialects like `mhlo`
 and IREE's internal dialects. While input programs are ultimately compiled down
 to modules suitable for running on some combination of IREE's target deployment
 platforms, IREE's developer tools can run individual compiler passes,

diff --git a/docs/mnist_example.md b/docs/mnist_example.md
index 8fad54e..93cd23a 100644
--- a/docs/mnist_example.md
+++ b/docs/mnist_example.md

@@ -29,40 +29,40 @@
   func @main(%arg0: tensor<1x28x28x1xf32>) -> tuple<tensor<1x10xf32>>
   attributes {iree.module.export} {
     %cst = constant  {name = "constant.9"} dense<0.5> : tensor<f32>
-    %0 = "xla_hlo.broadcast_in_dim"(%cst) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
-    %1 = "xla_hlo.copy"(%arg0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
-    %2 = "xla_hlo.reshape"(%1) {name = "reshape.2"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
-    %3 = "xla_hlo.reshape"(%2) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
+    %0 = "mhlo.broadcast_in_dim"(%cst) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
+    %1 = "mhlo.copy"(%arg0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
+    %2 = "mhlo.reshape"(%1) {name = "reshape.2"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
+    %3 = "mhlo.reshape"(%2) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
     %cst_0 = constant  {name = "constant.4"} dense<0.5> : tensor<784x128xf32>
-    %4 = "xla_hlo.dot"(%3, %cst_0) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
+    %4 = "mhlo.dot"(%3, %cst_0) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
     %cst_1 = constant  {name = "constant.6"} dense<0.5> : tensor<128xf32>
-    %5 = "xla_hlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
-    %6 = "xla_hlo.add"(%4, %5) {name = "add.8"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
-    %7 = "xla_hlo.maximum"(%0, %6) {name = "maximum.11"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %5 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
+    %6 = "mhlo.add"(%4, %5) {name = "add.8"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %7 = "mhlo.maximum"(%0, %6) {name = "maximum.11"} : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %cst_2 = constant  {name = "constant.12"} dense<0.5> : tensor<128x10xf32>
-    %8 = "xla_hlo.dot"(%7, %cst_2) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
+    %8 = "mhlo.dot"(%7, %cst_2) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
     %cst_3 = constant  {name = "constant.14"} dense<0.5> : tensor<10xf32>
-    %9 = "xla_hlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
-    %10 = "xla_hlo.add"(%8, %9) {name = "add.16"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %9 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
+    %10 = "mhlo.add"(%8, %9) {name = "add.16"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %cst_4 = constant  {name = "constant.17"} dense<0xFF800000> : tensor<f32>
-    %11 = "xla_hlo.reduce"(%10, %cst_4) ( {
+    %11 = "mhlo.reduce"(%10, %cst_4) ( {
     ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):   // no predecessors
-      %20 = "xla_hlo.maximum"(%arg1, %arg2) {name = "maximum.21"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
-      "xla_hlo.return"(%20) : (tensor<f32>) -> ()
+      %20 = "mhlo.maximum"(%arg1, %arg2) {name = "maximum.21"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "mhlo.return"(%20) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %12 = "xla_hlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
-    %13 = "xla_hlo.subtract"(%10, %12) {name = "subtract.24"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
-    %14 = "xla_hlo.exponential"(%13) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %12 = "mhlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %13 = "mhlo.subtract"(%10, %12) {name = "subtract.24"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %14 = "mhlo.exponential"(%13) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
     %cst_5 = constant  {name = "constant.27"} dense<0.5> : tensor<f32>
-    %15 = "xla_hlo.reduce"(%14, %cst_5) ( {
+    %15 = "mhlo.reduce"(%14, %cst_5) ( {
     ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):   // no predecessors
-      %21 = "xla_hlo.add"(%arg3, %arg4) {name = "add.31"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
-      "xla_hlo.return"(%21) : (tensor<f32>) -> ()
+      %21 = "mhlo.add"(%arg3, %arg4) {name = "add.31"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "mhlo.return"(%21) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %16 = "xla_hlo.broadcast_in_dim"(%15) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
-    %17 = "xla_hlo.divide"(%14, %16) {name = "divide.35"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
-    %18 = "xla_hlo.reshape"(%17) {name = "reshape.36"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %19 = "xla_hlo.tuple"(%18) {name = "tuple.37"} : (tensor<1x10xf32>) -> tuple<tensor<1x10xf32>>
+    %16 = "mhlo.broadcast_in_dim"(%15) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %17 = "mhlo.divide"(%14, %16) {name = "divide.35"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %18 = "mhlo.reshape"(%17) {name = "reshape.36"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %19 = "mhlo.tuple"(%18) {name = "tuple.37"} : (tensor<1x10xf32>) -> tuple<tensor<1x10xf32>>
     return %19 : tuple<tensor<1x10xf32>>
   }
 }
@@ -81,8 +81,8 @@
         func @main_entry_dispatch_0(%arg0: memref<1x28x28x1xf32>, %arg1: memref<1x784xf32>)
   attributes  {iree.executable.export, iree.executable.workload = dense<[784, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x28x28x1xf32>) : tensor<1x28x28x1xf32>
-          %1 = "xla_hlo.copy"(%0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
-          %2 = "xla_hlo.reshape"(%1) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
+          %1 = "mhlo.copy"(%0) {name = "copy.1"} : (tensor<1x28x28x1xf32>) -> tensor<1x28x28x1xf32>
+          %2 = "mhlo.reshape"(%1) {name = "reshape.3"} : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
           iree.store_output(%2 : tensor<1x784xf32>, %arg1 : memref<1x784xf32>)
           iree.return
         }
@@ -96,7 +96,7 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<[128, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x784xf32>) : tensor<1x784xf32>
           %1 = iree.load_input(%arg1 : memref<784x128xf32>) : tensor<784x128xf32>
-          %2 = "xla_hlo.dot"(%0, %1) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
+          %2 = "mhlo.dot"(%0, %1) {name = "dot.5", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
           iree.store_output(%2 : tensor<1x128xf32>, %arg2 : memref<1x128xf32>)
           iree.return
         }
@@ -111,10 +111,10 @@
           %0 = iree.load_input(%arg0 : memref<1x128xf32>) : tensor<1x128xf32>
           %cst = constant dense<5.000000e-01> : tensor<128xf32>
           %cst_0 = constant dense<5.000000e-01> : tensor<f32>
-          %1 = "xla_hlo.broadcast_in_dim"(%cst_0) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
-          %2 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
+          %1 = "mhlo.broadcast_in_dim"(%cst_0) {name = "broadcast.10"} : (tensor<f32>) -> tensor<1x128xf32>
+          %2 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.7"} : (tensor<128xf32>) -> tensor<1x128xf32>
           %3 = addf %0, %2 : tensor<1x128xf32>
-          %4 = xla_hlo.maximum %1, %3 {name = "maximum.11"} : tensor<1x128xf32>
+          %4 = mhlo.maximum %1, %3 {name = "maximum.11"} : tensor<1x128xf32>
           iree.store_output(%4 : tensor<1x128xf32>, %arg1 : memref<1x128xf32>)
           iree.return
         }
@@ -128,7 +128,7 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x128xf32>) : tensor<1x128xf32>
           %1 = iree.load_input(%arg1 : memref<128x10xf32>) : tensor<128x10xf32>
-          %2 = "xla_hlo.dot"(%0, %1) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
+          %2 = "mhlo.dot"(%0, %1) {name = "dot.13", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
           iree.store_output(%2 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
           iree.return
         }
@@ -142,7 +142,7 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
           %cst = constant dense<5.000000e-01> : tensor<10xf32>
-          %1 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
+          %1 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.15"} : (tensor<10xf32>) -> tensor<1x10xf32>
           %2 = addf %0, %1 : tensor<1x10xf32>
           iree.store_output(%2 : tensor<1x10xf32>, %arg1 : memref<1x10xf32>)
           iree.return
@@ -157,10 +157,10 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<1> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
           %cst = constant dense<0xFF800000> : tensor<f32>
-          %1 = "xla_hlo.reduce"(%0, %cst) ( {
+          %1 = "mhlo.reduce"(%0, %cst) ( {
           ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>): // no predecessors
-            %2 = xla_hlo.maximum %arg2, %arg3 {name = "maximum.21"} : tensor<f32>
-            "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+            %2 = mhlo.maximum %arg2, %arg3 {name = "maximum.21"} : tensor<f32>
+            "mhlo.return"(%2) : (tensor<f32>) -> ()
           }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
           iree.store_output(%1 : tensor<1xf32>, %arg1 : memref<1xf32>)
           iree.return
@@ -175,9 +175,9 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
           %1 = iree.load_input(%arg1 : memref<1xf32>) : tensor<1xf32>
-          %2 = "xla_hlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
+          %2 = "mhlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.23"} : (tensor<1xf32>) -> tensor<1x10xf32>
           %3 = subf %0, %2 : tensor<1x10xf32>
-          %4 = "xla_hlo.exponential"(%3) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
+          %4 = "mhlo.exponential"(%3) {name = "exponential.25"} : (tensor<1x10xf32>) -> tensor<1x10xf32>
           iree.store_output(%4 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
           iree.return
         }
@@ -191,10 +191,10 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<1> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1x10xf32>) : tensor<1x10xf32>
           %cst = constant dense<5.000000e-01> : tensor<f32>
-          %1 = "xla_hlo.reduce"(%0, %cst) ( {
+          %1 = "mhlo.reduce"(%0, %cst) ( {
           ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>): // no predecessors
             %2 = addf %arg2, %arg3 : tensor<f32>
-            "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+            "mhlo.return"(%2) : (tensor<f32>) -> ()
           }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
           iree.store_output(%1 : tensor<1xf32>, %arg1 : memref<1xf32>)
           iree.return
@@ -209,7 +209,7 @@
   attributes  {iree.executable.export, iree.executable.workload = dense<[10, 1, 1]> : tensor<3xi32>, iree.ordinal = 0 : i32} {
           %0 = iree.load_input(%arg0 : memref<1xf32>) : tensor<1xf32>
           %1 = iree.load_input(%arg1 : memref<1x10xf32>) : tensor<1x10xf32>
-          %2 = "xla_hlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
+          %2 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<0> : tensor<1xi64>, name = "broadcast.34"} : (tensor<1xf32>) -> tensor<1x10xf32>
           %3 = divf %1, %2 : tensor<1x10xf32>
           iree.store_output(%3 : tensor<1x10xf32>, %arg2 : memref<1x10xf32>)
           iree.return

diff --git a/docs/roadmap_design.md b/docs/roadmap_design.md
index 3063585..ad4c613 100644
--- a/docs/roadmap_design.md
+++ b/docs/roadmap_design.md

@@ -77,27 +77,27 @@
 updates) in order to perform a large majority of the `flow` transformations.
 Recent work in the [Linalg](https://mlir.llvm.org/docs/Dialects/Linalg/) dialect
 is adding support for operating on value-semantic tensors, meaning that we can
-first apply `xla_hlo` to `linalg` lowerings and any of the transformations
+first apply `mhlo` to `linalg` lowerings and any of the transformations
 available in Linalg prior to performing our own `flow` lowerings. The advantage
 is that Linalg will have much stronger and principled code motion and nested
 loop transformation optimizations than is possible on higher-level ops. As not
 all operations can be represented as `linalg` ops IREE will be able to ingest a
-mix of `linalg`, `std`, and `xla_hlo` (or its replacement) ops.
+mix of `linalg`, `std`, and `mhlo` (or its replacement) ops.
 
 ### XLA HLO: Canonicalizations
 
 <a id="markdown-XLA%20HLO%3A%20Canonicalizations" name="XLA%20HLO%3A%20Canonicalizations"></a>
 
-Very little effort has been applied to `xla_hlo` optimizations and there are a
+Very little effort has been applied to `mhlo` optimizations and there are a
 significant number of missing folders, canonicalizers, and simple
 transformations. Many of these happen in legacy XLA C++ backends; however we
 need them in MLIR so that we can make use of dynamic shapes, mixed dialect
 inputs, etc. The `tf2xla` bridge work (converting Tensorflow models into the
-corresponding `xla_hlo` ops) is nearing its initial milestones and afterward we
+corresponding `mhlo` ops) is nearing its initial milestones and afterward we
 expect more of these missing pieces to be filled in.
 
 Examples of the optimizations that will greatly benefit IREE (and any other
-backend consuming `xla_hlo`) include:
+backend consuming `mhlo`) include:
 
 -   Eliding unneeded transpose, reshape, and broadcast operations.
 -   Inserting transpose, reshape, and broadcast operations to allow for more
@@ -113,20 +113,20 @@
 HLO only operates on tensor values - even for simple scalars - and this presents
 a problem when attempting to determine which code should be specified to run on
 accelerators vs. what should run on the host. The canonical example is
-`xla_hlo.while`, which as seen in the example below uses scalar tensors for its
+`mhlo.while`, which as seen in the example below uses scalar tensors for its
 loop iteration counter and comparison.
 
 ```mlir
 %start = constant dense<1> : tensor<i32>
 %bound = constant dense<3> : tensor<i32>
-%res = "xla_hlo.while"(%start) ( {
+%res = "mhlo.while"(%start) ( {
 ^bb0(%count: tensor<i32>):
-  %1 = "xla_hlo.compare"(%count, %bound) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
-  "xla_hlo.return"(%1) : (tensor<i1>) -> ()
+  %1 = "mhlo.compare"(%count, %bound) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+  "mhlo.return"(%1) : (tensor<i1>) -> ()
 },  {
 ^bb0(%count: tensor<i32>):
-  %1 = xla_hlo.add %count, %count : tensor<i32>
-  "xla_hlo.return"(%1) : (tensor<i32>) -> ()
+  %1 = mhlo.add %count, %count : tensor<i32>
+  "mhlo.return"(%1) : (tensor<i32>) -> ()
 }) : (tensor<i32>) -> tensor<i32>
 ```
 
@@ -168,8 +168,8 @@
 
 Not all source frontends have this issue (misrepresenting simple host
 computation as non-dense tensor operations), and our goal is to add a
-transformation that heuristically converts `xla_hlo` ops acting on small tensors
-to `std` ops acting on primitive values (`i32`, `index`, etc).
+transformation that heuristically converts `mhlo` ops acting on small tensors to
+`std` ops acting on primitive values (`i32`, `index`, etc).
 
 ### Quantization
 
@@ -348,24 +348,24 @@
 additional dispatch, materialization of an intermediate tensor, and a barrier:
 
 ```mlir
-%bcast = "xla_hlo.broadcast_in_dim"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>
-%mul1 = xla_hlo.multiply %arg0, %bcast : tensor<1024x10xf32>
+%bcast = "mhlo.broadcast_in_dim"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>
+%mul1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>
 // (pretend something here that prevents fusion)
-%mul2 = xla_hlo.multiply %arg1, %bcast : tensor<1024x10xf32>
+%mul2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>
 ```
 
 ```mlir
 %bcast = flow.dispatch.region(%cst : tensor<f32>) -> tensor<1024x10xf32> {
-  %0 = "xla_hlo.broadcast_in_dim"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>
+  %0 = "mhlo.broadcast_in_dim"(%cst) : (tensor<f32>) -> tensor<1024x10xf32>
   return %0 : tensor<1024x10xf32>
 }
 // a barrier will be required here
 %mul1 = flow.dispatch.region(%arg0 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {
-  %1 = xla_hlo.multiply %arg0, %bcast : tensor<1024x10xf32>
+  %1 = mhlo.multiply %arg0, %bcast : tensor<1024x10xf32>
   return %1 : tensor<1024x10xf32>
 }
 %mul2 = flow.dispatch.region(%arg1 : tensor<1024x10xf32>, %bcast : tensor<1024x10xf32>) -> tensor<1024x10xf32> {
-  %2 = xla_hlo.multiply %arg1, %bcast : tensor<1024x10xf32>
+  %2 = mhlo.multiply %arg1, %bcast : tensor<1024x10xf32>
   return %2 : tensor<1024x10xf32>
 }
 ```

diff --git a/docs/simple_ir_walkthrough.md b/docs/simple_ir_walkthrough.md
index a820c99..1dfcb49 100644
--- a/docs/simple_ir_walkthrough.md
+++ b/docs/simple_ir_walkthrough.md

@@ -92,7 +92,7 @@
 ```mlir
 func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
     attributes { iree.module.export } {
-  %0 = xla_hlo.multiply(%arg0, %arg1) : tensor<4xf32>
+  %0 = mhlo.multiply(%arg0, %arg1) : tensor<4xf32>
   return %0 : tensor<4xf32>
 }
 ```

diff --git a/docs/testing_guide.md b/docs/testing_guide.md
index ce1ea4c..3be690d 100644
--- a/docs/testing_guide.md
+++ b/docs/testing_guide.md

@@ -217,28 +217,28 @@
 ```mlir
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[0.0, 1.1, 2.5, 4.9]> : tensor<4xf32>
-  %result = "xla_hlo.floor"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.floor"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>): tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<101.3> : tensor<f32>
-  %result = "xla_hlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<101.0> : tensor<f32>): tensor<f32>
   return
 }
 
 func @double() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<11.2> : tensor<f64>
-  %result = "xla_hlo.floor"(%input) : (tensor<f64>) -> tensor<f64>
+  %result = "mhlo.floor"(%input) : (tensor<f64>) -> tensor<f64>
   check.expect_almost_eq_const(%result, dense<11.0> : tensor<f64>): tensor<f64>
   return
 }
 
 func @negative() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<-1.1> : tensor<f32>
-  %result = "xla_hlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>): tensor<f32>
   return
 }

diff --git a/integrations/tensorflow/bindings/python/pyiree/tf/compiler/saved_model_test.py b/integrations/tensorflow/bindings/python/pyiree/tf/compiler/saved_model_test.py
index 7fd7cee..92f9c43 100644
--- a/integrations/tensorflow/bindings/python/pyiree/tf/compiler/saved_model_test.py
+++ b/integrations/tensorflow/bindings/python/pyiree/tf/compiler/saved_model_test.py

@@ -69,7 +69,7 @@
       input_module = compiler.tf_load_saved_model(sm_dir)
       xla_asm = input_module.to_asm()
       print("XLA ASM:", xla_asm)
-      self.assertRegex(xla_asm, "xla_hlo.tanh")
+      self.assertRegex(xla_asm, "mhlo.tanh")
 
 
 if __name__ == "__main__":

diff --git a/integrations/tensorflow/bindings/python/pyiree/xla/compiler/xla_module_proto_test.py b/integrations/tensorflow/bindings/python/pyiree/xla/compiler/xla_module_proto_test.py
index 741909f..eac1288 100644
--- a/integrations/tensorflow/bindings/python/pyiree/xla/compiler/xla_module_proto_test.py
+++ b/integrations/tensorflow/bindings/python/pyiree/xla/compiler/xla_module_proto_test.py

@@ -48,7 +48,7 @@
     # Validate imported ASM.
     xla_asm = module.to_asm()
     print("XLA ASM: ", xla_asm)
-    self.assertRegex(xla_asm, "xla_hlo.add")
+    self.assertRegex(xla_asm, "mhlo.add")
 
 
 if __name__ == "__main__":

diff --git a/iree/compiler/Conversion/HLOToLinalg/DecomposeHLOClamp.cpp b/iree/compiler/Conversion/HLOToLinalg/DecomposeHLOClamp.cpp
index 19606fd..835e84b 100644
--- a/iree/compiler/Conversion/HLOToLinalg/DecomposeHLOClamp.cpp
+++ b/iree/compiler/Conversion/HLOToLinalg/DecomposeHLOClamp.cpp

@@ -20,12 +20,12 @@
 namespace iree_compiler {
 
 namespace {
-/// A pass to decompose xla_hlo.clamp ops into xla_hlo.compare and
-/// xla_hlo.select ops.
-class DecomposeClampOp : public OpRewritePattern<xla_hlo::ClampOp> {
+/// A pass to decompose mhlo.clamp ops into mhlo.compare and
+/// mhlo.select ops.
+class DecomposeClampOp : public OpRewritePattern<mhlo::ClampOp> {
  public:
-  using OpRewritePattern<xla_hlo::ClampOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::ClampOp op,
+  using OpRewritePattern<mhlo::ClampOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(mhlo::ClampOp op,
                                 PatternRewriter &rewriter) const override {
     auto minType = op.min().getType().dyn_cast<RankedTensorType>();
     auto operandType = op.operand().getType().dyn_cast<RankedTensorType>();
@@ -38,14 +38,14 @@
 
     // clamp(a, x, b) = min(max(a, x), b)
     Location loc = op.getLoc();
-    Value cmpMin = rewriter.create<xla_hlo::CompareOp>(
+    Value cmpMin = rewriter.create<mhlo::CompareOp>(
         loc, op.min(), op.operand(), rewriter.getStringAttr("LT"));
-    Value selectMin = rewriter.create<xla_hlo::SelectOp>(
-        loc, operandType, cmpMin, op.operand(), op.min());
-    Value cmpMax = rewriter.create<xla_hlo::CompareOp>(
+    Value selectMin = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMin,
+                                                      op.operand(), op.min());
+    Value cmpMax = rewriter.create<mhlo::CompareOp>(
         loc, selectMin, op.max(), rewriter.getStringAttr("LT"));
-    Value selectMax = rewriter.create<xla_hlo::SelectOp>(
-        loc, operandType, cmpMax, selectMin, op.max());
+    Value selectMax = rewriter.create<mhlo::SelectOp>(loc, operandType, cmpMax,
+                                                      selectMin, op.max());
     rewriter.replaceOp(op, selectMax);
     return success();
   }

diff --git a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
index 7bb8fd4..1ae605e 100644
--- a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp
+++ b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnBuffers.cpp

@@ -15,7 +15,7 @@
 //===- HLOToLinalgOnBuffers.cpp - Pass to convert HLO to Linalg on buffers-===//
 //
 // Pass to convert from HLO to linalg on buffers. Currently only handles cases
-// where the dispatch region contains a single xla_hlo op that can be converted
+// where the dispatch region contains a single mhlo op that can be converted
 // to linalg on buffers.
 //
 //===----------------------------------------------------------------------===//
@@ -227,7 +227,7 @@
 }  // namespace
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.dot conversion patterns.
+// mhlo.dot conversion patterns.
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -239,7 +239,7 @@
 };
 }
 
-static DotOperationType getDotOperationType(xla_hlo::DotOp dotOp) {
+static DotOperationType getDotOperationType(mhlo::DotOp dotOp) {
   ArrayRef<int64_t> lhsShape =
       dotOp.lhs().getType().cast<ShapedType>().getShape();
   ArrayRef<int64_t> rhsShape =
@@ -261,14 +261,14 @@
 }
 
 namespace {
-/// Converts xla_hlo.dot operation to linalg.matmul op
+/// Converts mhlo.dot operation to linalg.matmul op
 template <DotOperationType opType, typename LinalgOpTy>
 struct DotOpConversion
     : public ConvertToLinalgBufferOp<DotOpConversion<opType, LinalgOpTy>,
-                                     xla_hlo::DotOp> {
+                                     mhlo::DotOp> {
   using ConvertToLinalgBufferOp<DotOpConversion<opType, LinalgOpTy>,
-                                xla_hlo::DotOp>::ConvertToLinalgBufferOp;
-  LogicalResult apply(xla_hlo::DotOp op, ArrayRef<Value> inputBuffers,
+                                mhlo::DotOp>::ConvertToLinalgBufferOp;
+  LogicalResult apply(mhlo::DotOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const {
     if (getDotOperationType(op) == opType) {
@@ -287,23 +287,23 @@
 }  // namespace
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.convolution conversion patterns and utility functions.
+// mhlo.convolution conversion patterns and utility functions.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Converts xla_hlo.convolution operation to linalg.conv op.
+/// Converts mhlo.convolution operation to linalg.conv op.
 struct ConvOpConversion
-    : public ConvertToLinalgBufferOp<ConvOpConversion, xla_hlo::ConvOp> {
+    : public ConvertToLinalgBufferOp<ConvOpConversion, mhlo::ConvOp> {
   using ConvertToLinalgBufferOp<ConvOpConversion,
-                                xla_hlo::ConvOp>::ConvertToLinalgBufferOp;
-  LogicalResult apply(xla_hlo::ConvOp op, ArrayRef<Value> inputBuffers,
+                                mhlo::ConvOp>::ConvertToLinalgBufferOp;
+  LogicalResult apply(mhlo::ConvOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
 }  // namespace
 
 LogicalResult ConvOpConversion::apply(
-    xla_hlo::ConvOp op, ArrayRef<Value> inputBuffers,
+    mhlo::ConvOp op, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
   if (const auto dimensionNumbers = op.dimension_numbers()) {
     const int inputSpatialRank =
@@ -388,27 +388,27 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.concatenate conversion patterns and utility functions.
+// mhlo.concatenate conversion patterns and utility functions.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Converts a xla_hlo.concatenate op to an indexed_generic op. The
+/// Converts a mhlo.concatenate op to an indexed_generic op. The
 /// implementation adds more dimensions to make the loops correct, because
 /// each dimension in indexing maps matches to exactly one range.
 class ConcatenateOpConversion
     : public ConvertToLinalgBufferOp<ConcatenateOpConversion,
-                                     xla_hlo::ConcatenateOp> {
+                                     mhlo::ConcatenateOp> {
  public:
-  using ConvertToLinalgBufferOp<
-      ConcatenateOpConversion, xla_hlo::ConcatenateOp>::ConvertToLinalgBufferOp;
-  LogicalResult apply(xla_hlo::ConcatenateOp op, ArrayRef<Value> inputBuffers,
+  using ConvertToLinalgBufferOp<ConcatenateOpConversion,
+                                mhlo::ConcatenateOp>::ConvertToLinalgBufferOp;
+  LogicalResult apply(mhlo::ConcatenateOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
 }  // namespace
 
 LogicalResult ConcatenateOpConversion::apply(
-    xla_hlo::ConcatenateOp op, ArrayRef<Value> inputBuffers,
+    mhlo::ConcatenateOp op, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
   Location loc = op.getLoc();
   int dim = op.dimension().getSExtValue();
@@ -489,27 +489,27 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.pad conversion patterns and utility functions.
+// mhlo.pad conversion patterns and utility functions.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Converts xla_hlo.pad operation to linalg.indexed_generic op.
+/// Converts mhlo.pad operation to linalg.indexed_generic op.
 // TODO(#1604): Lower the pad op to a Linalg named op.
 struct PadOpConversion
-    : public ConvertToLinalgBufferOp<PadOpConversion, xla_hlo::PadOp> {
+    : public ConvertToLinalgBufferOp<PadOpConversion, mhlo::PadOp> {
   using ConvertToLinalgBufferOp<PadOpConversion,
-                                xla_hlo::PadOp>::ConvertToLinalgBufferOp;
+                                mhlo::PadOp>::ConvertToLinalgBufferOp;
 
-  LogicalResult apply(xla_hlo::PadOp op, ArrayRef<Value> inputBuffers,
+  LogicalResult apply(mhlo::PadOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
 }  // namespace
 
 /// Returns an AffineMapAttr that is the indexing map to use for the input of a
-/// xla_hlo.pad `op`.
+/// mhlo.pad `op`.
 static AffineMapAttr getPadOpInputIndexingMap(
-    xla_hlo::PadOp op, int rank, ConversionPatternRewriter &rewriter) {
+    mhlo::PadOp op, int rank, ConversionPatternRewriter &rewriter) {
   const auto edgePaddingLow = convertDenseIntAttr(op.edge_padding_low());
   SmallVector<AffineExpr, 4> exprs;
   for (int i = 0; i < rank; ++i)
@@ -519,14 +519,14 @@
 }
 
 LogicalResult PadOpConversion::apply(
-    xla_hlo::PadOp op, ArrayRef<Value> inputBuffers,
-    ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
+    mhlo::PadOp op, ArrayRef<Value> inputBuffers, ArrayRef<Value> resultBuffers,
+    ConversionPatternRewriter &rewriter) const {
   if (llvm::any_of(op.interior_padding().getValues<IntegerAttr>(),
                    [](auto attr) { return attr.getInt() != 0; }))
     return op.emitError(
         "pad op with non-zero interiror_padding is not supported");
 
-  xla_hlo::PadOp::Adaptor adaptor(inputBuffers);
+  mhlo::PadOp::Adaptor adaptor(inputBuffers);
   auto loc = op.getLoc();
 
   Attribute paddingConstVal = getInitValueAsConst(adaptor.padding_value());
@@ -615,24 +615,24 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.slice conversion patterns.
+// mhlo.slice conversion patterns.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Converts xla_hlo.slice operation to linalg.subview + linalg.copy
+/// Converts mhlo.slice operation to linalg.subview + linalg.copy
 struct SliceOpConversion
-    : public ConvertToLinalgBufferOp<SliceOpConversion, xla_hlo::SliceOp> {
+    : public ConvertToLinalgBufferOp<SliceOpConversion, mhlo::SliceOp> {
   using ConvertToLinalgBufferOp<SliceOpConversion,
-                                xla_hlo::SliceOp>::ConvertToLinalgBufferOp;
+                                mhlo::SliceOp>::ConvertToLinalgBufferOp;
 
-  LogicalResult apply(xla_hlo::SliceOp op, ArrayRef<Value> inputBuffers,
+  LogicalResult apply(mhlo::SliceOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
 }  // namespace
 
 LogicalResult SliceOpConversion::apply(
-    xla_hlo::SliceOp op, ArrayRef<Value> inputBuffers,
+    mhlo::SliceOp op, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
   auto loc = op.getLoc();
   auto argType = inputBuffers[0].getType().template dyn_cast<ShapedType>();
@@ -658,7 +658,7 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.torch_index_select conversion patterns.
+// mhlo.torch_index_select conversion patterns.
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -670,22 +670,21 @@
 /// graph. It is just a magic buffer outside operations.
 struct TorchIndexSelectOpConversion
     : public ConvertToLinalgBufferOp<TorchIndexSelectOpConversion,
-                                     xla_hlo::TorchIndexSelectOp> {
+                                     mhlo::TorchIndexSelectOp> {
   using ConvertToLinalgBufferOp<
       TorchIndexSelectOpConversion,
-      xla_hlo::TorchIndexSelectOp>::ConvertToLinalgBufferOp;
+      mhlo::TorchIndexSelectOp>::ConvertToLinalgBufferOp;
 
-  LogicalResult apply(xla_hlo::TorchIndexSelectOp op,
-                      ArrayRef<Value> inputBuffers,
+  LogicalResult apply(mhlo::TorchIndexSelectOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
 }  // namespace
 
 LogicalResult TorchIndexSelectOpConversion::apply(
-    xla_hlo::TorchIndexSelectOp op, ArrayRef<Value> inputBuffers,
+    mhlo::TorchIndexSelectOp op, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
-  xla_hlo::TorchIndexSelectOp::Adaptor adaptor(inputBuffers);
+  mhlo::TorchIndexSelectOp::Adaptor adaptor(inputBuffers);
   int axis = op.dim().getSExtValue();
   int batch = op.batch_dims().getSExtValue();
   auto indexShapeType = adaptor.index().getType().dyn_cast<ShapedType>();
@@ -742,12 +741,12 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.reduce_window conversion patterns and utility functions.
+// mhlo.reduce_window conversion patterns and utility functions.
 //===----------------------------------------------------------------------===//
 
 namespace {
 
-/// xla_hlo.reduce_window is mapped to a linalg.pooling operation. The type of
+/// mhlo.reduce_window is mapped to a linalg.pooling operation. The type of
 /// the pooling is determined based on the body of the reduce window
 /// operation. This class enumerates the different variants.
 enum class PoolingType {
@@ -758,12 +757,11 @@
 
 struct ReduceWindowOpConversion
     : public ConvertToLinalgBufferOp<ReduceWindowOpConversion,
-                                     xla_hlo::ReduceWindowOp> {
-  using ConvertToLinalgBufferOp<
-      ReduceWindowOpConversion,
-      xla_hlo::ReduceWindowOp>::ConvertToLinalgBufferOp;
+                                     mhlo::ReduceWindowOp> {
+  using ConvertToLinalgBufferOp<ReduceWindowOpConversion,
+                                mhlo::ReduceWindowOp>::ConvertToLinalgBufferOp;
 
-  LogicalResult apply(xla_hlo::ReduceWindowOp op, ArrayRef<Value> inputBuffers,
+  LogicalResult apply(mhlo::ReduceWindowOp op, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 };
@@ -776,15 +774,15 @@
   assert(block.getOperations().size() == 2 &&
          "expected the block has exactlly two operations");
   auto op = block.begin();
-  if (isa<xla_hlo::MinOp>(op)) return PoolingType::kMin;
-  if (isa<xla_hlo::MaxOp>(op)) return PoolingType::kMax;
-  if (isa<xla_hlo::AddOp>(op)) return PoolingType::kAdd;
+  if (isa<mhlo::MinOp>(op)) return PoolingType::kMin;
+  if (isa<mhlo::MaxOp>(op)) return PoolingType::kMax;
+  if (isa<mhlo::AddOp>(op)) return PoolingType::kAdd;
 
   llvm_unreachable("unknown pooling type");
 }
 
 LogicalResult ReduceWindowOpConversion::apply(
-    xla_hlo::ReduceWindowOp op, ArrayRef<Value> inputBuffers,
+    mhlo::ReduceWindowOp op, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
   auto loc = op.getLoc();
 
@@ -843,7 +841,7 @@
 }
 
 //===----------------------------------------------------------------------===//
-// xla_hlo.reduce conversion patterns and utility functions.
+// mhlo.reduce conversion patterns and utility functions.
 //===----------------------------------------------------------------------===//
 
 /// Returns a permutation AffineMap that puts all reduction dimensions to the
@@ -874,7 +872,7 @@
 
 namespace {
 
-/// Type converter for converting the region of an xla_hlo::reduce op.
+/// Type converter for converting the region of an mhlo::reduce op.
 class ReduceRegionTypeConverter : public TypeConverter {
  public:
   Type convertType(Type type) const {
@@ -887,15 +885,15 @@
   }
 };
 
-/// Converts the xla_hlo.reduce op on tensors to a linalg.indexed_generic op on
+/// Converts the mhlo.reduce op on tensors to a linalg.indexed_generic op on
 /// buffers. Expects that the reduce op is the only op within the dispatch
 /// function. This pattern also fuses std.constant operations which are defining
 /// ops of the init value with the linalg.indexed_generic op.
 struct ReduceOpConversion
-    : public ConvertToLinalgBufferOp<ReduceOpConversion, xla_hlo::ReduceOp> {
+    : public ConvertToLinalgBufferOp<ReduceOpConversion, mhlo::ReduceOp> {
   using ConvertToLinalgBufferOp<ReduceOpConversion,
-                                xla_hlo::ReduceOp>::ConvertToLinalgBufferOp;
-  LogicalResult apply(xla_hlo::ReduceOp reduceOp, ArrayRef<Value> inputBuffers,
+                                mhlo::ReduceOp>::ConvertToLinalgBufferOp;
+  LogicalResult apply(mhlo::ReduceOp reduceOp, ArrayRef<Value> inputBuffers,
                       ArrayRef<Value> resultBuffers,
                       ConversionPatternRewriter &rewriter) const;
 
@@ -940,13 +938,13 @@
   }
 };
 
-/// Converts xla_hlo.return to within a reduce region to a linalg.yield.
+/// Converts mhlo.return to within a reduce region to a linalg.yield.
 struct ReduceRegionReturnOpConversion final
     : public ReduceRegionOpConversion<ReduceRegionReturnOpConversion,
-                                      xla_hlo::ReturnOp> {
+                                      mhlo::ReturnOp> {
   using ReduceRegionOpConversion<ReduceRegionReturnOpConversion,
-                                 xla_hlo::ReturnOp>::ReduceRegionOpConversion;
-  static Operation *apply(xla_hlo::ReturnOp op, ArrayRef<Value> operands,
+                                 mhlo::ReturnOp>::ReduceRegionOpConversion;
+  static Operation *apply(mhlo::ReturnOp op, ArrayRef<Value> operands,
                           ConversionPatternRewriter &rewriter) {
     return rewriter.create<linalg::YieldOp>(op.getLoc(), operands[0]);
   }
@@ -954,7 +952,7 @@
 }  // namespace
 
 LogicalResult ReduceOpConversion::apply(
-    xla_hlo::ReduceOp reduceOp, ArrayRef<Value> inputBuffers,
+    mhlo::ReduceOp reduceOp, ArrayRef<Value> inputBuffers,
     ArrayRef<Value> resultBuffers, ConversionPatternRewriter &rewriter) const {
   if (reduceOp.getNumOperands() != 2) return failure();
   Value src = *reduceOp.operands().begin();
@@ -1388,9 +1386,9 @@
       TensorReshapeOpConversion, TorchIndexSelectOpConversion>(
       context, resultTensorToBufferMap);
   // Reduce region operation conversions.
-  patterns.insert<ReduceRegionXLAOpConversion<xla_hlo::AddOp>,
-                  ReduceRegionXLAOpConversion<xla_hlo::MinOp>,
-                  ReduceRegionXLAOpConversion<xla_hlo::MaxOp>,
+  patterns.insert<ReduceRegionXLAOpConversion<mhlo::AddOp>,
+                  ReduceRegionXLAOpConversion<mhlo::MinOp>,
+                  ReduceRegionXLAOpConversion<mhlo::MaxOp>,
                   ReduceRegionReturnOpConversion>(context);
 }
 
@@ -1413,7 +1411,7 @@
 
   ConversionTarget target(*context);
   // Make sure all XLA HLO ops are converted to Linalg ops after this pass.
-  target.addIllegalDialect<xla_hlo::XlaHloDialect>();
+  target.addIllegalDialect<mhlo::XlaHloDialect>();
   // All Linalg ops should operate on buffers. So hal.interface.*.tensor ops
   // should be gone.
   target.addIllegalOp<IREE::HAL::InterfaceLoadTensorOp,

diff --git a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnTensors.cpp b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnTensors.cpp
index a33e0ae..a7814a6 100644
--- a/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnTensors.cpp
+++ b/iree/compiler/Conversion/HLOToLinalg/HLOToLinalgOnTensors.cpp

@@ -54,12 +54,12 @@
       return isa<linalg::LinalgOp>(op.getOperation()->getParentOp());
     });
     // Don't convert the body of reduction ops.
-    target.addDynamicallyLegalDialect<xla_hlo::XlaHloDialect>(
+    target.addDynamicallyLegalDialect<mhlo::XlaHloDialect>(
         Optional<ConversionTarget::DynamicLegalityCallbackFn>(
             [](Operation* op) {
               auto parentOp = op->getParentRegion()->getParentOp();
-              return isa<xla_hlo::ReduceOp>(parentOp) ||
-                     isa<xla_hlo::ReduceWindowOp>(parentOp);
+              return isa<mhlo::ReduceOp>(parentOp) ||
+                     isa<mhlo::ReduceWindowOp>(parentOp);
             }));
     // Let the rest fall through.
     target.markUnknownOpDynamicallyLegal([](Operation*) { return true; });
@@ -74,7 +74,7 @@
 
 void populateHLOToLinalgOnTensorsConversionPatterns(
     MLIRContext* context, OwningRewritePatternList& patterns) {
-  xla_hlo::populateHLOToLinalgConversionPattern(context, &patterns);
+  mhlo::populateHLOToLinalgConversionPattern(context, &patterns);
 }
 
 std::unique_ptr<OperationPass<FuncOp>> createHLOToLinalgOnTensorsPass() {

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir b/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir
index 7e36b48..760983b 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/arithmetic_ops.mlir

@@ -3,7 +3,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @addf
 func @addf(%operand: tensor<2x2xf32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.add"(%operand, %operand)
+  %result = "mhlo.add"(%operand, %operand)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   return
 }
@@ -24,7 +24,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @addi
 func @addi(%operand: tensor<2x2xi32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.add"(%operand, %operand)
+  %result = "mhlo.add"(%operand, %operand)
       : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
   return
 }
@@ -46,7 +46,7 @@
 // CHECK: func @subf
 func @subf(%operand: tensor<2x2xf32>)
 attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.subtract"(%operand, %operand)
+  %result = "mhlo.subtract"(%operand, %operand)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   return
 }
@@ -67,7 +67,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @subi
 func @subi(%operand: tensor<2x2xi32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.subtract"(%operand, %operand)
+  %result = "mhlo.subtract"(%operand, %operand)
       : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
   return
 }
@@ -88,7 +88,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @mulf
 func @mulf(%operand: tensor<2x2xf32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.multiply"(%operand, %operand)
+  %result = "mhlo.multiply"(%operand, %operand)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   return
 }
@@ -109,7 +109,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @muli
 func @muli(%operand: tensor<2x2xi32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.multiply"(%operand, %operand)
+  %result = "mhlo.multiply"(%operand, %operand)
       : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
   return
 }
@@ -130,7 +130,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @divf
 func @divf(%operand: tensor<2x2xf32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.divide"(%operand, %operand)
+  %result = "mhlo.divide"(%operand, %operand)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   return
 }
@@ -151,7 +151,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @divi
 func @divi(%operand: tensor<2x2xi32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.divide"(%operand, %operand)
+  %result = "mhlo.divide"(%operand, %operand)
       : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
   return
 }

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir b/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir
index 21fcce3..af36509 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/concatenate.mlir

@@ -15,7 +15,7 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<2x2xi32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<2x3xi32>
-    %2 = "xla_hlo.concatenate"(%0, %1) {
+    %2 = "mhlo.concatenate"(%0, %1) {
       dimension = 1
     } : (tensor<2x2xi32>, tensor<2x3xi32>) -> tensor<2x5xi32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<2x5xi32>
@@ -45,7 +45,7 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<2x2xi32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<3x2xi32>
-    %2 = "xla_hlo.concatenate"(%0, %1) {
+    %2 = "mhlo.concatenate"(%0, %1) {
       dimension = 0
     } : (tensor<2x2xi32>, tensor<3x2xi32>) -> tensor<5x2xi32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5x2xi32>

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir b/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir
index db1dca6..9845ed9 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/conv.mlir

@@ -11,7 +11,7 @@
     // CHECK-SAME: padding = dense<[
     // CHECK-SAME:                  [0, 1], [0, 1]]> : tensor<2x2xi64>
     // CHECK-SAME: strides = [2, 1]}
-    %2 = "xla_hlo.convolution"(%1, %0) {
+    %2 = "mhlo.convolution"(%1, %0) {
       batch_group_count = 1 : i64,
       dimension_numbers = {
         input_batch_dimension = 0 : i64,

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir b/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
index f0753a1..a706486 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/decompose_hlo_clamp.mlir

@@ -3,11 +3,11 @@
 // CHECK-LABEL: func @clamp
 // CHECK-SAME: (%[[MIN:.+]]: tensor<4xf32>, %[[INPUT:.+]]: tensor<4xf32>, %[[MAX:.+]]: tensor<4xf32>)
 func @clamp(%min: tensor<4xf32>, %value: tensor<4xf32>, %max: tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK: %[[CMP_MIN:.+]] = "xla_hlo.compare"(%[[MIN]], %[[INPUT]]) {comparison_direction = "LT"}
-  // CHECK: %[[SLT_MIN:.+]] = "xla_hlo.select"(%[[CMP_MIN]], %[[INPUT]], %[[MIN]])
-  // CHECK: %[[CMP_MAX:.+]] = "xla_hlo.compare"(%[[SLT_MIN]], %[[MAX]]) {comparison_direction = "LT"}
-  // CHECK: %[[SLT_MAX:.+]] = "xla_hlo.select"(%[[CMP_MAX]], %[[SLT_MIN]], %[[MAX]])
+  // CHECK: %[[CMP_MIN:.+]] = "mhlo.compare"(%[[MIN]], %[[INPUT]]) {comparison_direction = "LT"}
+  // CHECK: %[[SLT_MIN:.+]] = "mhlo.select"(%[[CMP_MIN]], %[[INPUT]], %[[MIN]])
+  // CHECK: %[[CMP_MAX:.+]] = "mhlo.compare"(%[[SLT_MIN]], %[[MAX]]) {comparison_direction = "LT"}
+  // CHECK: %[[SLT_MAX:.+]] = "mhlo.select"(%[[CMP_MAX]], %[[SLT_MIN]], %[[MAX]])
   // CHECK: return %[[SLT_MAX]]
-  %0 = "xla_hlo.clamp"(%min, %value, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.clamp"(%min, %value, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   return %0 : tensor<4xf32>
 }

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/dot.mlir b/iree/compiler/Conversion/HLOToLinalg/test/dot.mlir
index 86458d2..142fce0 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/dot.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/dot.mlir

@@ -7,7 +7,7 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<2x3xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<3x2xf32>
     // CHECK: linalg.matmul %{{.+}}, %{{.+}}, %{{.+}} : (memref<2x3xf32>, memref<3x2xf32>, memref<2x2xf32>)
-    %result = "xla_hlo.dot"(%0, %1) : (tensor<2x3xf32>, tensor<3x2xf32>) -> tensor<2x2xf32>
+    %result = "mhlo.dot"(%0, %1) : (tensor<2x3xf32>, tensor<3x2xf32>) -> tensor<2x2xf32>
     hal.interface.store.tensor %result, @legacy_io::@ret0, offset = %c0 : tensor<2x2xf32>
     return
   }

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir b/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir
index d7de0be..fb0c143 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/dynamic_shape.mlir

@@ -4,7 +4,7 @@
 // CHECK: func @dynamic_shape
 func @dynamic_shape(%operand: tensor<?x?xf32>)
 attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.exponential"(%operand) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+  %result = "mhlo.exponential"(%operand) : (tensor<?x?xf32>) -> tensor<?x?xf32>
   return
 }
 // CHECK: linalg.generic {

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir b/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir
index b0ac2d4..bcbb8de 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/exp.mlir

@@ -3,7 +3,7 @@
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK: func @exp
 func @exp(%operand: tensor<2x2xf32>) attributes {iree.dispatch_fn_name = ""} {
-  %result = "xla_hlo.exponential"(%operand) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  %result = "mhlo.exponential"(%operand) : (tensor<2x2xf32>) -> tensor<2x2xf32>
   return
 }
 // CHECK: linalg.generic {

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir b/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir
index 9361f9e..a167ef4 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/pad.mlir

@@ -7,7 +7,7 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
     // CHECK: linalg.indexed_generic
     %1 = constant dense<0.0> : tensor<f32>
-    %2 = "xla_hlo.pad"(%0, %1) {
+    %2 = "mhlo.pad"(%0, %1) {
       edge_padding_high = dense<[2, 3]> : tensor<2xi64>,
       edge_padding_low = dense<[4, 5]> : tensor<2xi64>,
       interior_padding = dense<0> : tensor<2xi64>
@@ -30,7 +30,7 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
     // CHECK: linalg.indexed_generic
-    %2 = "xla_hlo.pad"(%0, %1) {
+    %2 = "mhlo.pad"(%0, %1) {
       edge_padding_high = dense<[2, 3]> : tensor<2xi64>,
       edge_padding_low = dense<[4, 5]> : tensor<2xi64>,
       interior_padding = dense<0> : tensor<2xi64>
@@ -54,7 +54,7 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<12x4xf32>
     // CHECK: linalg.indexed_generic
     %1 = constant dense<0.0> : tensor<f32>
-    %2 = "xla_hlo.pad"(%0, %1) {
+    %2 = "mhlo.pad"(%0, %1) {
       edge_padding_high = dense<0> : tensor<2xi64>,
       edge_padding_low = dense<0> : tensor<2xi64>,
       interior_padding = dense<0> : tensor<2xi64>

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/pw_linalg_fusion.mlir b/iree/compiler/Conversion/HLOToLinalg/test/pw_linalg_fusion.mlir
index 06d92ba..b65cf46 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/pw_linalg_fusion.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/pw_linalg_fusion.mlir

@@ -10,8 +10,8 @@
   // CHECK: %[[TEMP:[a-zA-Z0-9$._-]+]] = muli %[[ARG0]], %[[ARG1]]
   // CHECK: addi %[[TEMP]], %[[ARG2]]
   // CHECK-NOT: linalg.generic
-  %4 = "xla_hlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %5 = "xla_hlo.add"(%4, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %4 = "mhlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %5 = "mhlo.add"(%4, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
   return %5 : tensor<4x8xi32>
 }
 
@@ -29,9 +29,9 @@
   // CHECK: %[[TEMP2:[a-zA-Z0-9$._-]+]] = addi %[[TEMP1]], %[[ARG2]]
   // CHECK: subi %[[TEMP2]], %[[ARG3]]
   // CHECK-NOT: linalg.generic
-  %4 = "xla_hlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %5 = "xla_hlo.add"(%4, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %6 = "xla_hlo.subtract"(%5, %arg3) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %4 = "mhlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %5 = "mhlo.add"(%4, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %6 = "mhlo.subtract"(%5, %arg3) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
   return %6: tensor<4x8xi32>
 }
 
@@ -49,9 +49,9 @@
   // CHECK-DAG: %[[TEMP2:[a-zA-Z0-9$._-]+]] = addi %[[ARG2]], %[[ARG3]]
   // CHECK: subi %[[TEMP1]], %[[TEMP2]]
   // CHECK-NOT: linalg.generic
-  %4 = "xla_hlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %5 = "xla_hlo.add"(%arg2, %arg3) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %6 = "xla_hlo.subtract"(%4, %5) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %4 = "mhlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %5 = "mhlo.add"(%arg2, %arg3) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %6 = "mhlo.subtract"(%4, %5) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
   return %6: tensor<4x8xi32>
 }
 
@@ -69,8 +69,8 @@
   // CHECK-DAG: %[[TEMP2:[a-zA-Z0-9$._-]+]] = addi %[[ARG2]], %[[ARG3]]
   // CHECK: subi %[[TEMP1]], %[[TEMP2]]
   // CHECK-NOT: linalg.generic
-  %3 = "xla_hlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %4 = "xla_hlo.add"(%arg0, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
-  %5 = "xla_hlo.subtract"(%3, %4) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %3 = "mhlo.multiply"(%arg0, %arg1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %4 = "mhlo.add"(%arg0, %arg2) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %5 = "mhlo.subtract"(%3, %4) :  (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
   return %5: tensor<4x8xi32>
 }

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir b/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir
index f1e483c..f639092 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/reduce.mlir

@@ -22,10 +22,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.add %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.add %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<5x4xf32>, tensor<f32>) -> tensor<5xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
     return
@@ -46,10 +46,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.minimum %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.minimum %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<5x4xf32>, tensor<f32>) -> tensor<5xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
     return
@@ -70,10 +70,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.maximum %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.maximum %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<5x4xf32>, tensor<f32>) -> tensor<5xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<5xf32>
     return
@@ -94,10 +94,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.maximum %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.maximum %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5x4xf32>, tensor<f32>) -> tensor<4xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<4xf32>
     return
@@ -133,10 +133,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.add %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.add %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5x4xf32>, tensor<f32>) -> tensor<4xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<4xf32>
     return
@@ -162,10 +162,10 @@
     // CHECK: ^{{.+}}(%{{.+}}: index, %[[DIM:.+]]: index, %{{.+}}: f32, %[[OUTPUT:.+]]: f32):
     // CHECK: select %{{.+}}, %[[CST]], %[[OUTPUT]] : f32
     %cst = constant dense<0xFF800000> : tensor<f32>
-    %1 = "xla_hlo.reduce"(%0, %cst) ({
+    %1 = "mhlo.reduce"(%0, %cst) ({
     ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>): // no predecessors
-      %2 = xla_hlo.add %arg2, %arg3 {name = "maximum.21"} : tensor<f32>
-      "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+      %2 = mhlo.add %arg2, %arg3 {name = "maximum.21"} : tensor<f32>
+      "mhlo.return"(%2) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
     hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<1xf32>
     return
@@ -205,10 +205,10 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x4x3xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
-    %2 = "xla_hlo.reduce"(%0, %1) ({
+    %2 = "mhlo.reduce"(%0, %1) ({
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.add %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.add %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {dimensions = dense<[0, 2]> : tensor<2xi64>} : (tensor<5x4x3xf32>, tensor<f32>) -> tensor<4xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<4xf32>
     return

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/reduce_window.mlir b/iree/compiler/Conversion/HLOToLinalg/test/reduce_window.mlir
index ef1d51d..ab080e0 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/reduce_window.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/reduce_window.mlir

@@ -6,10 +6,10 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x18x18x64xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
     // CHECK: linalg.pooling_min
-    %2 = "xla_hlo.reduce_window"(%0, %1) ( {
+    %2 = "mhlo.reduce_window"(%0, %1) ( {
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.minimum %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.minimum %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {window_dimensions = dense<[1, 3, 3, 1]> : tensor<4xi64>,
         window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>} : (tensor<1x18x18x64xf32>, tensor<f32>) -> tensor<1x8x8x64xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<1x8x8x64xf32>
@@ -30,10 +30,10 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x18x18x64xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
     // CHECK: linalg.pooling_max
-    %2 = "xla_hlo.reduce_window"(%0, %1) ( {
+    %2 = "mhlo.reduce_window"(%0, %1) ( {
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.maximum %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.maximum %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {window_dimensions = dense<[1, 3, 3, 1]> : tensor<4xi64>,
         window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>} : (tensor<1x18x18x64xf32>, tensor<f32>) -> tensor<1x8x8x64xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<1x8x8x64xf32>
@@ -54,10 +54,10 @@
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<1x18x18x64xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<f32>
     // CHECK: linalg.pooling_sum
-    %2 = "xla_hlo.reduce_window"(%0, %1) ( {
+    %2 = "mhlo.reduce_window"(%0, %1) ( {
     ^bb0(%arg3: tensor<f32>, %arg4 : tensor<f32>):
-      %3 = xla_hlo.add %arg3, %arg4 : tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = mhlo.add %arg3, %arg4 : tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
     }) {window_dimensions = dense<[1, 3, 3, 1]> : tensor<4xi64>,
         window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>} : (tensor<1x18x18x64xf32>, tensor<f32>) -> tensor<1x8x8x64xf32>
     hal.interface.store.tensor %2, @legacy_io::@ret0, offset = %c0 : tensor<1x8x8x64xf32>

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/slice.mlir b/iree/compiler/Conversion/HLOToLinalg/test/slice.mlir
index 5e5c6b6..300c0ff 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/slice.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/slice.mlir

@@ -7,7 +7,7 @@
   func @slice_whole_buffer() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<3x4xi32>
-    %1 = "xla_hlo.slice"(%0) {
+    %1 = "mhlo.slice"(%0) {
       start_indices = dense<[0, 0]> : tensor<2xi64>,
       limit_indices = dense<[3, 4]> : tensor<2xi64>,
       strides = dense<1> : tensor<2xi64>
@@ -41,7 +41,7 @@
   func @slice_whole_stride() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<3x4xi32>
-    %1 = "xla_hlo.slice"(%0) {
+    %1 = "mhlo.slice"(%0) {
       start_indices = dense<[1, 0]> : tensor<2xi64>,
       limit_indices = dense<[2, 4]> : tensor<2xi64>,
       strides = dense<1> : tensor<2xi64>
@@ -75,7 +75,7 @@
   func @slice_stride_part() {
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<3x4xi32>
-    %1 = "xla_hlo.slice"(%0) {
+    %1 = "mhlo.slice"(%0) {
       start_indices = dense<[1, 1]> : tensor<2xi64>,
       limit_indices = dense<[2, 3]> : tensor<2xi64>,
       strides = dense<1> : tensor<2xi64>

diff --git a/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir b/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir
index 5aceac4..1cf33d1 100644
--- a/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir
+++ b/iree/compiler/Conversion/HLOToLinalg/test/torch_index_select.mlir

@@ -23,7 +23,7 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<5x1x5xi32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<2xi32>
-    %2 = "xla_hlo.torch_index_select"(%0, %1) {
+    %2 = "mhlo.torch_index_select"(%0, %1) {
       dim = 0 : i64,
       batch_dims = 0 : i64
     } : (tensor<5x1x5xi32>, tensor<2xi32>) -> tensor<2x1x5xi32>
@@ -61,7 +61,7 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<4x8xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<i32>
-    %2 = "xla_hlo.torch_index_select"(%0, %1) {
+    %2 = "mhlo.torch_index_select"(%0, %1) {
       batch_dims = 0 : i64,
       dim = 0 : i64
     } : (tensor<4x8xf32>, tensor<i32>) -> tensor<8xf32>
@@ -99,7 +99,7 @@
     %c0 = constant 0 : index
     %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<4x7x8x2xf32>
     %1 = hal.interface.load.tensor @legacy_io::@arg1, offset = %c0 : tensor<4x1xi32>
-    %2 = "xla_hlo.torch_index_select"(%0, %1) {
+    %2 = "mhlo.torch_index_select"(%0, %1) {
       dim = 2 : i64,
       batch_dims = 1 : i64
     } : (tensor<4x7x8x2xf32>, tensor<4x1xi32>) -> tensor<4x7x1x2xf32>

diff --git a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
index a6640a5..16a05ef 100644
--- a/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp
+++ b/iree/compiler/Dialect/Flow/Analysis/Dispatchability.cpp

@@ -115,11 +115,10 @@
         // TODO(benvanik): widen to all known terminators? sometimes they may
         // have side-effects.
         continue;
-      } else if (isa<xla_hlo::DotOp>(op) || isa<xla_hlo::ConvOp>(op)) {
+      } else if (isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op)) {
         // Some unfusable ops must remain on their own.
         return false;
-      } else if (isa<xla_hlo::ReduceOp>(op) ||
-                 isa<xla_hlo::ReduceWindowOp>(op)) {
+      } else if (isa<mhlo::ReduceOp>(op) || isa<mhlo::ReduceWindowOp>(op)) {
         // Reductions always become flow ops.
         return false;
 

diff --git a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir b/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
index f2e8728..9ccefe3 100644
--- a/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir
+++ b/iree/compiler/Dialect/Flow/Analysis/test/dispatchability.mlir

@@ -20,7 +20,7 @@
 // CHECK-LABEL: @simpleMath
 // CHECK-SAME: dispatchable = true
 func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   return %0 : tensor<4xf32>
 }
 
@@ -40,9 +40,9 @@
 // CHECK-LABEL: @hloElementwiseOps
 // CHECK-SAME: dispatchable = true
 func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = xla_hlo.subtract %0, %arg0 : tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
+  %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 
@@ -51,9 +51,9 @@
 // CHECK-LABEL: @interleavedDot
 // CHECK-SAME: dispatchable = false
 func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = "xla_hlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4x4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
+  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
   return %2 : tensor<4x4xf32>
 }
 
@@ -62,15 +62,15 @@
 // CHECK-LABEL: @caller
 // CHECK-SAME: dispatchable = true
 func @caller(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 // CHECK-LABEL: func @callee
 // CHECK-SAME: dispatchable = true
 func @callee(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+  %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
   return %0 : tensor<4xf32>
 }
 
@@ -79,14 +79,14 @@
 // CHECK-LABEL: @dotCaller
 // CHECK-SAME: dispatchable = false
 func @dotCaller(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
   %1 = call @dotCallee(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4x4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
   return %2 : tensor<4x4xf32>
 }
 // CHECK-LABEL: func @dotCallee
 // CHECK-SAME: dispatchable = false
 func @dotCallee(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = "xla_hlo.dot"(%arg0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  %0 = "mhlo.dot"(%arg0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
   return %0 : tensor<4x4xf32>
 }

diff --git a/iree/compiler/Dialect/Flow/Conversion/HLOToFlow/ConvertHLOToFlow.cpp b/iree/compiler/Dialect/Flow/Conversion/HLOToFlow/ConvertHLOToFlow.cpp
index 11d13e5..0eb58e5 100644
--- a/iree/compiler/Dialect/Flow/Conversion/HLOToFlow/ConvertHLOToFlow.cpp
+++ b/iree/compiler/Dialect/Flow/Conversion/HLOToFlow/ConvertHLOToFlow.cpp

@@ -30,9 +30,9 @@
 
 namespace {
 
-struct ConstOpLowering : public OpRewritePattern<xla_hlo::ConstOp> {
+struct ConstOpLowering : public OpRewritePattern<mhlo::ConstOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::ConstOp op,
+  LogicalResult matchAndRewrite(mhlo::ConstOp op,
                                 PatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<ConstantOp>(op, op.value());
     return success();
@@ -40,9 +40,9 @@
 };
 
 struct DynamicUpdateSliceOpLowering
-    : public OpRewritePattern<xla_hlo::DynamicUpdateSliceOp> {
+    : public OpRewritePattern<mhlo::DynamicUpdateSliceOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::DynamicUpdateSliceOp op,
+  LogicalResult matchAndRewrite(mhlo::DynamicUpdateSliceOp op,
                                 PatternRewriter &rewriter) const override {
     auto startIndices = llvm::to_vector<4>(
         llvm::map_range(op.start_indices(), [&](Value tensorValue) {
@@ -61,8 +61,7 @@
 
 void setupDirectHLOToFlowLegality(MLIRContext *context,
                                   ConversionTarget &conversionTarget) {
-  conversionTarget
-      .addIllegalOp<xla_hlo::ConstOp, xla_hlo::DynamicUpdateSliceOp>();
+  conversionTarget.addIllegalOp<mhlo::ConstOp, mhlo::DynamicUpdateSliceOp>();
 }
 
 void populateHLOToFlowPatterns(MLIRContext *context,

diff --git a/iree/compiler/Dialect/Flow/IR/test/stream_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/stream_folding.mlir
index 9afe440..b769f7b 100644
--- a/iree/compiler/Dialect/Flow/IR/test/stream_folding.mlir
+++ b/iree/compiler/Dialect/Flow/IR/test/stream_folding.mlir

@@ -6,7 +6,7 @@
   flow.dispatch.entry @rgn_dispatch_0
   module {
     func @rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }

diff --git a/iree/compiler/Dialect/Flow/IR/test/stream_ops.mlir b/iree/compiler/Dialect/Flow/IR/test/stream_ops.mlir
index ebb3f45..aeb3f55 100644
--- a/iree/compiler/Dialect/Flow/IR/test/stream_ops.mlir
+++ b/iree/compiler/Dialect/Flow/IR/test/stream_ops.mlir

@@ -6,7 +6,7 @@
   flow.dispatch.entry @rgn_dispatch_0
   module {
     func @rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }

diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
index cf02a3f..9f40dcf 100644
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/DispatchConfig.cpp

@@ -32,11 +32,10 @@
 // TODO(laurenzo): Every one of these should have better support and removed
 // from this exclusion list eventually.
 bool isUnsupportedFusionOp(Operation *op) {
-  return isa<xla_hlo::DotOp>(op) || isa<xla_hlo::ConvOp>(op) ||
-         isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::PadOp>(op) ||
-         isa<xla_hlo::ReduceWindowOp>(op) ||
-         isa<xla_hlo::TorchIndexSelectOp>(op) || isa<xla_hlo::SliceOp>(op) ||
-         isa<xla_hlo::ConcatenateOp>(op);
+  return isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op) ||
+         isa<mhlo::ReduceOp>(op) || isa<mhlo::PadOp>(op) ||
+         isa<mhlo::ReduceWindowOp>(op) || isa<mhlo::TorchIndexSelectOp>(op) ||
+         isa<mhlo::SliceOp>(op) || isa<mhlo::ConcatenateOp>(op);
 }
 
 // Allowlist of ops that materialize to a an index-permuted copy of some kind
@@ -46,11 +45,11 @@
   // TODO(laurenzo): Curate this list more specifically (or have a better
   // mechanism for determining).
   return isa<Shape::RankedBroadcastInDimOp>(op) ||
-         isa<xla_hlo::BroadcastInDimOp>(op) ||
-         isa<xla_hlo::DynamicBroadcastInDimOp>(op) ||
-         isa<xla_hlo::DynamicReshapeOp>(op) ||
-         isa<xla_hlo::DynamicSliceOp>(op) || isa<xla_hlo::ReshapeOp>(op) ||
-         isa<xla_hlo::SliceOp>(op) || isa<xla_hlo::TransposeOp>(op);
+         isa<mhlo::BroadcastInDimOp>(op) ||
+         isa<mhlo::DynamicBroadcastInDimOp>(op) ||
+         isa<mhlo::DynamicReshapeOp>(op) || isa<mhlo::DynamicSliceOp>(op) ||
+         isa<mhlo::ReshapeOp>(op) || isa<mhlo::SliceOp>(op) ||
+         isa<mhlo::TransposeOp>(op);
 }
 }  // namespace
 
@@ -123,7 +122,7 @@
     // We generally do not want to form anchors around ops that just do a copy
     // (perhaps with an affine map) except as a last resort.
     return 1;
-  } else if (isa<xla_hlo::SelectOp>(op)) {
+  } else if (isa<mhlo::SelectOp>(op)) {
     // TODO(#2050): In a number of cases, this makes it less likely to split
     // a DR across a compare/select boundary. Remove this once i1 is legalized
     // properly.

diff --git a/iree/compiler/Dialect/Flow/Transforms/FlattenTuplesInCFG.cpp b/iree/compiler/Dialect/Flow/Transforms/FlattenTuplesInCFG.cpp
index b2379dc..d6bfd26 100644
--- a/iree/compiler/Dialect/Flow/Transforms/FlattenTuplesInCFG.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/FlattenTuplesInCFG.cpp

@@ -55,7 +55,7 @@
     values.push_back(processTuple(subtype, loc, block, builder));
   }
 
-  return builder.create<xla_hlo::TupleOp>(loc, tupleType, values);
+  return builder.create<mhlo::TupleOp>(loc, tupleType, values);
 }
 
 void copyOperationAttrs(Operation *oldOp, Operation *newOp) {
@@ -84,7 +84,7 @@
   for (int i = 0; i < tupleType.size(); i++) {
     auto subType = tupleType.getType(i);
 
-    auto elementOp = builder.create<xla_hlo::GetTupleElementOp>(
+    auto elementOp = builder.create<mhlo::GetTupleElementOp>(
         loc, subType, value, builder.getI32IntegerAttr(i));
     recursiveUntuple(elementOp.getResult(), loc, builder, mapping, newValues);
   }
@@ -106,8 +106,7 @@
     subValues.push_back(recursiveRetuple(subtype, values, builder, loc));
   }
 
-  return builder.create<xla_hlo::TupleOp>(loc, tupleType, subValues)
-      .getResult();
+  return builder.create<mhlo::TupleOp>(loc, tupleType, subValues).getResult();
 }
 
 template <typename T>

diff --git a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
index a2d4197..78e867d 100644
--- a/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/FoldCompatibleDispatchRegions.cpp

@@ -197,10 +197,10 @@
   for (auto &block : regionOp.body().getBlocks()) {
     for (auto &op : block) {
       // TODO(b/144530470): replace with tablegen attributes/interfaces.
-      if (isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::DotOp>(op) ||
-          isa<xla_hlo::ConvOp>(op) || isa<xla_hlo::ReduceWindowOp>(op) ||
-          isa<xla_hlo::PadOp>(op) || isa<xla_hlo::TorchIndexSelectOp>(op) ||
-          isa<xla_hlo::SliceOp>(op) || isa<xla_hlo::ConcatenateOp>(op)) {
+      if (isa<mhlo::ReduceOp>(op) || isa<mhlo::DotOp>(op) ||
+          isa<mhlo::ConvOp>(op) || isa<mhlo::ReduceWindowOp>(op) ||
+          isa<mhlo::PadOp>(op) || isa<mhlo::TorchIndexSelectOp>(op) ||
+          isa<mhlo::SliceOp>(op) || isa<mhlo::ConcatenateOp>(op)) {
         return false;
       }
     }

diff --git a/iree/compiler/Dialect/Flow/Transforms/HLOToHLOPreprocessing.cpp b/iree/compiler/Dialect/Flow/Transforms/HLOToHLOPreprocessing.cpp
index 8e9767e..e591f84 100644
--- a/iree/compiler/Dialect/Flow/Transforms/HLOToHLOPreprocessing.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/HLOToHLOPreprocessing.cpp

@@ -49,12 +49,11 @@
                       [](APInt v) -> bool { return !v.isNullValue(); });
 }
 
-class ExtractConvOpPaddingAttributes
-    : public OpRewritePattern<xla_hlo::ConvOp> {
+class ExtractConvOpPaddingAttributes : public OpRewritePattern<mhlo::ConvOp> {
  public:
-  using OpRewritePattern<xla_hlo::ConvOp>::OpRewritePattern;
+  using OpRewritePattern<mhlo::ConvOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(xla_hlo::ConvOp op,
+  LogicalResult matchAndRewrite(mhlo::ConvOp op,
                                 PatternRewriter &rewriter) const override {
     if (!hasPadding(op)) return failure();
     auto inputType = op.lhs().getType().cast<ShapedType>();
@@ -71,7 +70,7 @@
       paddingHigh[dim] = op.paddingAttr().getValue<int64_t>({idx, 1});
     }
     for (unsigned i = 0; i < rank; ++i) {
-      // xla_hlo.pad doesn't support dynamic shape.
+      // mhlo.pad doesn't support dynamic shape.
       if (inputType.isDynamicDim(i)) return failure();
       int size = inputType.getShape()[i];
       shape.push_back(size + paddingLow[i] + paddingHigh[i]);
@@ -89,11 +88,11 @@
     Attribute zeroAttr = rewriter.getZeroAttr(
         RankedTensorType::get({}, inputType.getElementType()));
     auto zero = rewriter.create<ConstantOp>(loc, zeroAttr);
-    auto padOp = rewriter.create<xla_hlo::PadOp>(
+    auto padOp = rewriter.create<mhlo::PadOp>(
         loc, padResultType, op.lhs(), zero, toDenseAttr(paddingLow),
         toDenseAttr(paddingHigh), toDenseAttr(interiorPadding));
     auto resultType = op.getResult().getType();
-    auto newOp = rewriter.create<xla_hlo::ConvOp>(
+    auto newOp = rewriter.create<mhlo::ConvOp>(
         op.getLoc(), resultType, padOp.getResult(), op.rhs(),
         op.window_stridesAttr(), /*padding=*/nullptr, op.lhs_dilationAttr(),
         op.rhs_dilationAttr(), op.dimension_numbersAttr(),
@@ -105,11 +104,11 @@
 };
 
 class ExtractReduceWindowOpPaddingAttributes
-    : public OpRewritePattern<xla_hlo::ReduceWindowOp> {
+    : public OpRewritePattern<mhlo::ReduceWindowOp> {
  public:
-  using OpRewritePattern<xla_hlo::ReduceWindowOp>::OpRewritePattern;
+  using OpRewritePattern<mhlo::ReduceWindowOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(xla_hlo::ReduceWindowOp op,
+  LogicalResult matchAndRewrite(mhlo::ReduceWindowOp op,
                                 PatternRewriter &rewriter) const override {
     if (!op.padding()) return failure();
     if (op.base_dilations() || op.window_dilations()) return failure();
@@ -119,7 +118,7 @@
     int rank = inputType.getRank();
     SmallVector<int64_t, 4> paddingLow, paddingHigh, interiorPadding, shape;
     for (unsigned i = 0; i < rank; ++i) {
-      // xla_hlo.pad doesn't support dynamic shape.
+      // mhlo.pad doesn't support dynamic shape.
       if (inputType.isDynamicDim(i)) return failure();
       interiorPadding.push_back(0);
       paddingLow.push_back(op.paddingAttr().getValue<int64_t>({i, 0}));
@@ -137,11 +136,11 @@
     auto loc = op.getLoc();
     auto padResultType =
         RankedTensorType::get(shape, inputType.getElementType());
-    auto padOp = rewriter.create<xla_hlo::PadOp>(
+    auto padOp = rewriter.create<mhlo::PadOp>(
         loc, padResultType, op.operand(), op.init_value(),
         toDenseAttr(paddingLow), toDenseAttr(paddingHigh),
         toDenseAttr(interiorPadding));
-    auto newOp = rewriter.create<xla_hlo::ReduceWindowOp>(
+    auto newOp = rewriter.create<mhlo::ReduceWindowOp>(
         loc, op.getResult().getType(), padOp, op.init_value(),
         op.window_dimensions(), op.window_stridesAttr(),
         op.base_dilationsAttr(), op.window_dilationsAttr(),
@@ -152,12 +151,12 @@
   }
 };
 
-// Adjust the shape of depthwise_conv filter where is applied by xla_hlo.
-class AdjustDepthwiseFilterShape : public OpRewritePattern<xla_hlo::ConvOp> {
+// Adjust the shape of depthwise_conv filter where is applied by mhlo.
+class AdjustDepthwiseFilterShape : public OpRewritePattern<mhlo::ConvOp> {
  public:
-  using OpRewritePattern<xla_hlo::ConvOp>::OpRewritePattern;
+  using OpRewritePattern<mhlo::ConvOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(xla_hlo::ConvOp op,
+  LogicalResult matchAndRewrite(mhlo::ConvOp op,
                                 PatternRewriter &rewriter) const override {
     const auto featureInDim =
         op.dimension_numbers().kernel_input_feature_dimension().getInt();
@@ -175,12 +174,12 @@
     newShape[featureOutDim] /= groupCount;
     auto loc = op.getLoc();
     auto elemType = op.rhs().getType().cast<ShapedType>().getElementType();
-    auto reshapeOp = rewriter.create<xla_hlo::ReshapeOp>(
+    auto reshapeOp = rewriter.create<mhlo::ReshapeOp>(
         loc, RankedTensorType::get(newShape, elemType), op.rhs());
     auto resultType = op.getResult().getType();
     SmallVector<Value, 2> operands = {op.lhs(), reshapeOp.getResult()};
-    auto newOp = rewriter.create<xla_hlo::ConvOp>(op.getLoc(), resultType,
-                                                  operands, op.getAttrs());
+    auto newOp = rewriter.create<mhlo::ConvOp>(op.getLoc(), resultType,
+                                               operands, op.getAttrs());
     rewriter.replaceOp(op, newOp.getResult());
     return success();
   }
@@ -191,7 +190,7 @@
   void runOnFunction() override {
     MLIRContext *context = &getContext();
     OwningRewritePatternList patterns;
-    xla_hlo::PopulateUnfuseBatchNormPatterns(context, &patterns);
+    mhlo::PopulateUnfuseBatchNormPatterns(context, &patterns);
     // Note that various input modalities may do their own legalization of
     // CHLO. Converting here allows IREE to accept CHLO dialect regardless of
     // whether it was legalized away at a higher level.

diff --git a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions.cpp b/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions.cpp
index e589aea..dc30ec3 100644
--- a/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/IdentifyDispatchRegions.cpp

@@ -116,10 +116,10 @@
 // Preconditions: isDispatchableOp(op) == true.
 bool isFusionRootOp(Operation *op) {
   // TODO(b/144530470): replace with tablegen attributes/interfaces.
-  // TODO(#1605): Remove xla_hlo::PadOp from the check.
-  if (isa<xla_hlo::DotOp>(op) || isa<xla_hlo::ConvOp>(op) ||
-      isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::PadOp>(op) ||
-      isa<xla_hlo::ReduceWindowOp>(op)) {
+  // TODO(#1605): Remove mhlo::PadOp from the check.
+  if (isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op) ||
+      isa<mhlo::ReduceOp>(op) || isa<mhlo::PadOp>(op) ||
+      isa<mhlo::ReduceWindowOp>(op)) {
     // We have hand-written kernels for these right now we want to stand alone.
     // When we do a bit more magic we should allow these ops to fold.
     LLVM_DEBUG(llvm::dbgs() << "  NOT A FUSION ROOT (Special Op): "
@@ -149,14 +149,14 @@
 // Preconditions: isDispatchableOp(op) == true.
 bool isFusableOp(Operation *op) {
   // TODO(b/144530470): replace with tablegen attributes/interfaces.
-  if (isa<xla_hlo::DotOp>(op) || isa<xla_hlo::ConvOp>(op)) {
+  if (isa<mhlo::DotOp>(op) || isa<mhlo::ConvOp>(op)) {
     return false;
-  } else if (isa<xla_hlo::ReduceOp>(op) || isa<xla_hlo::ReduceWindowOp>(op)) {
+  } else if (isa<mhlo::ReduceOp>(op) || isa<mhlo::ReduceWindowOp>(op)) {
     // Reduction is usually a dedicated root operation - we can shove things in
     // the front of it but not behind.
     return false;
-  } else if (isa<xla_hlo::PadOp>(op)) {
-    // TODO(#1605): Remove xla_hlo::PadOp from the check.
+  } else if (isa<mhlo::PadOp>(op)) {
+    // TODO(#1605): Remove mhlo::PadOp from the check.
     return false;
   }
   return true;

diff --git a/iree/compiler/Dialect/Flow/Transforms/LegalizeInputTypes.cpp b/iree/compiler/Dialect/Flow/Transforms/LegalizeInputTypes.cpp
index 48bb2b0..0b5d964 100644
--- a/iree/compiler/Dialect/Flow/Transforms/LegalizeInputTypes.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/LegalizeInputTypes.cpp

@@ -90,8 +90,7 @@
     }
   }
 
-  if (llvm::isa<mlir::ConstantOp>(oldOp) ||
-      llvm::isa<xla_hlo::ConstOp>(oldOp)) {
+  if (llvm::isa<mlir::ConstantOp>(oldOp) || llvm::isa<mhlo::ConstOp>(oldOp)) {
     // Deal with all value-based constant ops generically.
     Attribute oldValue = oldOp->getAttr("value");
     auto newValue = convertAttribute(oldOp->getLoc(), oldValue, typeConverter);

diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index d3783b7..636ddd4 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.cpp

@@ -39,7 +39,7 @@
   passManager.addPass(createCanonicalizerPass());
 
   // Flatten structured control flow to our CFG.
-  passManager.addNestedPass<FuncOp>(xla_hlo::createLegalizeControlFlowPass());
+  passManager.addNestedPass<FuncOp>(mhlo::createLegalizeControlFlowPass());
   passManager.addPass(createHLOPreprocessingPass());
 
   // Run passes to remove shape constraints. HLO lowering inserts them, but they

diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h
index 7b992d5..0ce0551 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.h
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h

@@ -60,7 +60,7 @@
 std::unique_ptr<OperationPass<ModuleOp>> createLegalizeInputTypesPass();
 
 /// Creates XLA-HLO preprocessing transformation pass. In this pass we should
-/// have all xla_hlo -> xla_hlo transformations that are shared between all
+/// have all mhlo -> mhlo transformations that are shared between all
 /// backends.
 std::unique_ptr<OperationPass<FuncOp>> createHLOPreprocessingPass();
 

diff --git a/iree/compiler/Dialect/Flow/Transforms/PrePostPartitioningConversion.cpp b/iree/compiler/Dialect/Flow/Transforms/PrePostPartitioningConversion.cpp
index 1d08bd3..ccc47e4 100644
--- a/iree/compiler/Dialect/Flow/Transforms/PrePostPartitioningConversion.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/PrePostPartitioningConversion.cpp

@@ -50,7 +50,7 @@
     }
     Location loc = op.getLoc();
     auto i8Type = rewriter.getIntegerType(8);
-    auto i8Operand = rewriter.create<xla_hlo::ConvertOp>(loc, args[0], i8Type);
+    auto i8Operand = rewriter.create<mhlo::ConvertOp>(loc, args[0], i8Type);
     auto loadOp =
         rewriter.create<ExtractElementOp>(loc, i8Type, i8Operand, op.indices());
     auto i1Type = rewriter.getI1Type();
@@ -78,18 +78,16 @@
     conversionTarget.addLegalOp<FuncOp>();
 
     // Allow XLA HLO ops - we explicitly mark the ones we don't want below.
-    conversionTarget.addLegalDialect<xla_hlo::XlaHloDialect>();
+    conversionTarget.addLegalDialect<mhlo::XlaHloDialect>();
 
     // Control flow must be converted to standard form via
-    // xla_hlo::createLegalizeControlFlowPass() prior to conversion.
-    conversionTarget
-        .addIllegalOp<xla_hlo::IfOp, xla_hlo::CaseOp, xla_hlo::WhileOp>();
+    // mhlo::createLegalizeControlFlowPass() prior to conversion.
+    conversionTarget.addIllegalOp<mhlo::IfOp, mhlo::CaseOp, mhlo::WhileOp>();
 
     // We don't support broadcast_dimensions as part of ops, so materialize
-    // any such attributes to dedicated xla_hlo.broadcast_in_dim ops.
-    xla_hlo::SetupMaterializeBroadcastsLegality(context, &conversionTarget);
-    xla_hlo::PopulateMaterializeBroadcastsPatterns(context,
-                                                   &conversionPatterns);
+    // any such attributes to dedicated mhlo.broadcast_in_dim ops.
+    mhlo::SetupMaterializeBroadcastsLegality(context, &conversionTarget);
+    mhlo::PopulateMaterializeBroadcastsPatterns(context, &conversionPatterns);
 
     // Early conversion of ops that have matches we want to route through.
     // For example, DynamicUpdateSlice should end up as a stream operation.

diff --git a/iree/compiler/Dialect/Flow/Transforms/RematerializeDispatchConstants.cpp b/iree/compiler/Dialect/Flow/Transforms/RematerializeDispatchConstants.cpp
index 8b28d95..fc9d792 100644
--- a/iree/compiler/Dialect/Flow/Transforms/RematerializeDispatchConstants.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/RematerializeDispatchConstants.cpp

@@ -60,7 +60,7 @@
   for (auto &block : dispatchRegionOp.body()) {
     for (auto &op : block) {
       // TODO(b/144530470): replace with tablegen attributes/interfaces.
-      if (isa<xla_hlo::DotOp>(&op) || isa<xla_hlo::ConvOp>(&op)) {
+      if (isa<mhlo::DotOp>(&op) || isa<mhlo::ConvOp>(&op)) {
         // These two generally result in a lot of generated code so we try to
         // keep constants out such that can dedupe more. We may still want to
         // allow some parameters in (shapes/etc).

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
index 4e1a7ef..e08d671 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/fold_compatible_dispatch_regions.mlir

@@ -3,7 +3,7 @@
 func @noFolding(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
   %cst = constant 4 : index
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-    %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
+    %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
     flow.return %1 : tensor<4xf32>
   }
   return %0 : tensor<4xf32>
@@ -12,7 +12,7 @@
 // CHECK-LABEL: func @noFolding
 // CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 4 : index
 // CHECK-NEXT: %0 = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
+// CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
 // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
 // CHECK-NEXT: }
 // CHECK-NEXT: return %0 : tensor<4xf32>
@@ -22,15 +22,15 @@
 func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
   %cst = constant 4 : index
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-    %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
+    %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
     flow.return %1 : tensor<4xf32>
   }
   %2 = flow.dispatch.region[%cst : index](%arg2 = %arg0 : tensor<4xf32>, %arg3 = %0 : tensor<4xf32>) -> tensor<4xf32> {
-    %3 = xla_hlo.subtract %arg3, %arg2 : tensor<4xf32>
+    %3 = mhlo.subtract %arg3, %arg2 : tensor<4xf32>
     flow.return %3 : tensor<4xf32>
   }
   %4 = flow.dispatch.region[%cst : index](%arg4 = %arg0 : tensor<4xf32>, %arg5 = %2 : tensor<4xf32>) -> tensor<4xf32> {
-    %5 = xla_hlo.multiply %arg4, %arg5 : tensor<4xf32>
+    %5 = mhlo.multiply %arg4, %arg5 : tensor<4xf32>
     flow.return %5 : tensor<4xf32>
   }
   return %4 : tensor<4xf32>
@@ -39,9 +39,9 @@
 // CHECK-LABEL: func @elementwiseOps
 // CHECK: %[[WORKLOAD0:.+]] = constant 4
 // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
-// CHECK-NEXT:   %2 = xla_hlo.subtract %1, %arg1 : tensor<4xf32>
-// CHECK-NEXT:   %3 = xla_hlo.multiply %arg1, %2 : tensor<4xf32>
+// CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
+// CHECK-NEXT:   %2 = mhlo.subtract %1, %arg1 : tensor<4xf32>
+// CHECK-NEXT:   %3 = mhlo.multiply %arg1, %2 : tensor<4xf32>
 // CHECK-NEXT:   flow.return %3 : tensor<4xf32>
 // CHECK-NEXT: }
 // CHECK: return %[[R0]] : tensor<4xf32>
@@ -51,17 +51,17 @@
 func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
   %cst = constant 16 : index
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = xla_hlo.add %arg1, %arg1 : tensor<4x4xf32>
+    %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
     flow.return %3 : tensor<4x4xf32>
   }
   %cst_0 = constant 16 : index
   %1 = flow.dispatch.region[%cst_0 : index](%arg1 = %0 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = "xla_hlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+    %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
     flow.return %3 : tensor<4x4xf32>
   }
   %cst_1 = constant 16 : index
   %2 = flow.dispatch.region[%cst_1 : index](%arg1 = %1 : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    %3 = xla_hlo.multiply %arg1, %arg2 : tensor<4x4xf32>
+    %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
     flow.return %3 : tensor<4x4xf32>
   }
   return %2 : tensor<4x4xf32>
@@ -70,17 +70,17 @@
 // CHECK-LABEL: func @interleavedDot
 // CHECK-NEXT: %[[WORKLOAD0:.+]] = constant 16 : index
 // CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:   %3 = xla_hlo.add %arg1, %arg1 : tensor<4x4xf32>
+// CHECK-NEXT:   %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
 // CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
 // CHECK-NEXT: }
 // CHECK-NEXT: %[[WORKLOAD1:.+]] = constant 16 : index
 // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region[%[[WORKLOAD1]] : index](%arg1 = %[[R0]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:   %3 = "xla_hlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT:   %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
 // CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
 // CHECK-NEXT: }
 // CHECK-NEXT: %[[WORKLOAD2:.+]] = constant 16 : index
 // CHECK-NEXT: %[[R2:.+]] = flow.dispatch.region[%[[WORKLOAD2]] : index](%arg1 = %[[R1]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:   %3 = xla_hlo.multiply %arg1, %arg2 : tensor<4x4xf32>
+// CHECK-NEXT:   %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
 // CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
 // CHECK-NEXT: }
 // CHECK-NEXT: return %[[R2]] : tensor<4x4xf32>

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/form_streams.mlir b/iree/compiler/Dialect/Flow/Transforms/test/form_streams.mlir
index c426cc0..32d2def 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/form_streams.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/form_streams.mlir

@@ -6,7 +6,7 @@
   }
   module {
     func @outerOps_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -57,7 +57,7 @@
   }
   module {
     func @interleavedOuterOps_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -115,7 +115,7 @@
   }
   module {
     func @interleavedDot_rgn_dispatch_0(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
       return %0 : tensor<4x4xf32>
     }
   }
@@ -126,7 +126,7 @@
   }
   module {
     func @interleavedDot_rgn_dispatch_1(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-      %0 = "xla_hlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+      %0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
       return %0 : tensor<4x4xf32>
     }
   }
@@ -137,7 +137,7 @@
   }
   module {
     func @interleavedDot_rgn_dispatch_2(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-      %0 = xla_hlo.multiply %arg0, %arg1 : tensor<4x4xf32>
+      %0 = mhlo.multiply %arg0, %arg1 : tensor<4x4xf32>
       return %0 : tensor<4x4xf32>
     }
   }
@@ -167,7 +167,7 @@
   }
   module {
     func @caller_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -178,7 +178,7 @@
   }
   module {
     func @caller_rgn_dispatch_1(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.multiply %arg0, %arg1 : tensor<4xf32>
+      %0 = mhlo.multiply %arg0, %arg1 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -206,7 +206,7 @@
   flow.dispatch.entry @callee_rgn_dispatch_0
   module {
     func @callee_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing.mlir b/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing.mlir
index 908cddd..278ef35 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing.mlir

@@ -10,19 +10,19 @@
     %x: tensor<4x256xf32>, %scale: tensor<256xf32>, %offset: tensor<256xf32>,
     %mean: tensor<256xf32>, %variance: tensor<256xf32>)
     -> (tensor<4x256xf32>) {
-  // CHECK-DAG: %[[EPS:.+]] = xla_hlo.constant dense<1.001000e-05> : tensor<f32>
-  // CHECK-DAG: %[[EPS_BCAST:.+]] = "xla_hlo.broadcast_in_dim"(%[[EPS]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<256xf32>
-  // CHECK-DAG: %[[VARIANCE_EPS:.+]] = xla_hlo.add %[[VARIANCE]], %[[EPS_BCAST]] : tensor<256xf32>
-  // CHECK-DAG: %[[STDDEV:.+]] = "xla_hlo.sqrt"(%[[VARIANCE_EPS]]) : (tensor<256xf32>) -> tensor<256xf32>
-  // CHECK-DAG: %[[STDDEV_BCAST:.+]] = "xla_hlo.broadcast_in_dim"(%[[STDDEV]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
-  // CHECK-DAG: %[[SCALE_BCAST:.+]] = "xla_hlo.broadcast_in_dim"(%[[SCALE]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
-  // CHECK-DAG: %[[OFFSET_BCAST:.+]] = "xla_hlo.broadcast_in_dim"(%[[OFFSET]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
-  // CHECK-DAG: %[[MEAN_BCAST:.+]] = "xla_hlo.broadcast_in_dim"(%[[MEAN]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
-  // CHECK-DAG: %[[X_CENTER:.+]] = xla_hlo.subtract %[[X]], %[[MEAN_BCAST]] : tensor<4x256xf32>
-  // CHECK-DAG: %[[X_SCALED:.+]] = xla_hlo.multiply %[[X_CENTER]], %[[SCALE_BCAST]] : tensor<4x256xf32>
-  // CHECK-DAG: %[[X_NORMED:.+]] = xla_hlo.divide %[[X_SCALED]], %[[STDDEV_BCAST]] : tensor<4x256xf32>
-  // CHECK-DAG: %[[RESULT:.+]] = xla_hlo.add %[[X_NORMED]], %[[OFFSET_BCAST]] : tensor<4x256xf32>
-  %0 = "xla_hlo.batch_norm_inference"(%x, %scale, %offset, %mean, %variance)
+  // CHECK-DAG: %[[EPS:.+]] = mhlo.constant dense<1.001000e-05> : tensor<f32>
+  // CHECK-DAG: %[[EPS_BCAST:.+]] = "mhlo.broadcast_in_dim"(%[[EPS]]) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<256xf32>
+  // CHECK-DAG: %[[VARIANCE_EPS:.+]] = mhlo.add %[[VARIANCE]], %[[EPS_BCAST]] : tensor<256xf32>
+  // CHECK-DAG: %[[STDDEV:.+]] = "mhlo.sqrt"(%[[VARIANCE_EPS]]) : (tensor<256xf32>) -> tensor<256xf32>
+  // CHECK-DAG: %[[STDDEV_BCAST:.+]] = "mhlo.broadcast_in_dim"(%[[STDDEV]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
+  // CHECK-DAG: %[[SCALE_BCAST:.+]] = "mhlo.broadcast_in_dim"(%[[SCALE]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
+  // CHECK-DAG: %[[OFFSET_BCAST:.+]] = "mhlo.broadcast_in_dim"(%[[OFFSET]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
+  // CHECK-DAG: %[[MEAN_BCAST:.+]] = "mhlo.broadcast_in_dim"(%[[MEAN]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<256xf32>) -> tensor<4x256xf32>
+  // CHECK-DAG: %[[X_CENTER:.+]] = mhlo.subtract %[[X]], %[[MEAN_BCAST]] : tensor<4x256xf32>
+  // CHECK-DAG: %[[X_SCALED:.+]] = mhlo.multiply %[[X_CENTER]], %[[SCALE_BCAST]] : tensor<4x256xf32>
+  // CHECK-DAG: %[[X_NORMED:.+]] = mhlo.divide %[[X_SCALED]], %[[STDDEV_BCAST]] : tensor<4x256xf32>
+  // CHECK-DAG: %[[RESULT:.+]] = mhlo.add %[[X_NORMED]], %[[OFFSET_BCAST]] : tensor<4x256xf32>
+  %0 = "mhlo.batch_norm_inference"(%x, %scale, %offset, %mean, %variance)
       {epsilon = 1.001000e-05 : f32, feature_index = 1 : i64} :
       (tensor<4x256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>,
         tensor<256xf32>) -> tensor<4x256xf32>
@@ -34,10 +34,10 @@
 
 // CHECK: @depth_conv(%[[ARG0:.+]]: tensor<2x4x5x2xf32>, %[[ARG1:.+]]: tensor<2x2x2x3xf32>)
 func @depth_conv(%arg0: tensor<2x4x5x2xf32>, %arg1: tensor<2x2x2x3xf32>) -> tensor<2x3x4x6xf32> {
-    // CHECK-NOT: xla_hlo.reshape
-    // CHECK: "xla_hlo.convolution"(%[[ARG0]], %[[ARG1]])
-    %0 = "xla_hlo.reshape"(%arg1) : (tensor<2x2x2x3xf32>) -> tensor<2x2x1x6xf32>
-    %1 = "xla_hlo.convolution"(%arg0, %0) {
+    // CHECK-NOT: mhlo.reshape
+    // CHECK: "mhlo.convolution"(%[[ARG0]], %[[ARG1]])
+    %0 = "mhlo.reshape"(%arg1) : (tensor<2x2x2x3xf32>) -> tensor<2x2x1x6xf32>
+    %1 = "mhlo.convolution"(%arg0, %0) {
       batch_group_count = 1 : i64,
       dimension_numbers = {
         input_batch_dimension = 0 : i64,
@@ -60,17 +60,17 @@
 
 // CHECK-LABEL: @reduce_window
 func @reduce_window(%input: tensor<1x16x16x64xf32>) -> tensor<1x8x8x64xf32> {
-  // CHECK: %[[INITVAL:.+]] = xla_hlo.constant dense<0xFF800000> : tensor<f32>
-  %initval = xla_hlo.constant dense<0xFF800000> : tensor<f32>
-  //      CHECK: %[[PAD:.+]] = "xla_hlo.pad"(%{{.+}}, %[[INITVAL]])
+  // CHECK: %[[INITVAL:.+]] = mhlo.constant dense<0xFF800000> : tensor<f32>
+  %initval = mhlo.constant dense<0xFF800000> : tensor<f32>
+  //      CHECK: %[[PAD:.+]] = "mhlo.pad"(%{{.+}}, %[[INITVAL]])
   // CHECK-SAME: edge_padding_high = dense<[0, 1, 1, 0]> : tensor<4xi64>
   // CHECK-SAME: edge_padding_low = dense<[0, 1, 1, 0]> : tensor<4xi64>
-  //      CHECK: "xla_hlo.reduce_window"(%[[PAD]], %[[INITVAL]])
+  //      CHECK: "mhlo.reduce_window"(%[[PAD]], %[[INITVAL]])
   //  CHECK-NOT: padding
-  %0 = "xla_hlo.reduce_window"(%input, %initval) ( {
+  %0 = "mhlo.reduce_window"(%input, %initval) ( {
   ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):   // no predecessors
-    %3 = xla_hlo.maximum %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = mhlo.maximum %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {window_dimensions = dense<[1, 3, 3, 1]> : tensor<4xi64>,
       window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
       padding = dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi64>

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir b/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir
index b37d3e3..d31016b 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/hlo_to_hlo_preprocessing_extract_pad_from_conv.mlir

@@ -1,13 +1,13 @@
 // RUN: iree-opt -iree-flow-hlo-to-hlo-preprocessing -iree-extract-pad-from-conv %s | IreeFileCheck %s
 
 // CHECK-LABEL: @conv
-//       CHECK: xla_hlo.pad
+//       CHECK: mhlo.pad
 //  CHECK-SAME: edge_padding_high = dense<[0, 1, 1, 0]>
 //  CHECK-SAME: edge_padding_low = dense<[0, 1, 0, 0]>
-//       CHECK: xla_hlo.convolution
+//       CHECK: mhlo.convolution
 //   CHECK-NOT: padding
 func @conv(%inputs: tensor<1x4x5x2xf32>, %weights: tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32> {
-  %0 = "xla_hlo.convolution"(%inputs, %weights) {
+  %0 = "mhlo.convolution"(%inputs, %weights) {
   batch_group_count = 1 : i64,
   dimension_numbers = {
     input_batch_dimension = 0 : i64,

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions.mlir
index fe41f70..933f3b1 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions.mlir

@@ -14,8 +14,8 @@
   // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
@@ -50,12 +50,12 @@
   // CHECK-NEXT: %0 = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
-  // CHECK-NEXT:   %2 = xla_hlo.subtract %1, %arg1 : tensor<4xf32>
-  %1 = xla_hlo.subtract %0, %arg0 : tensor<4xf32>
-  // CHECK-NEXT:   %3 = xla_hlo.multiply %2, %arg1 : tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %2 = mhlo.subtract %1, %arg1 : tensor<4xf32>
+  %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %3 = mhlo.multiply %2, %arg1 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %3 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return %0 : tensor<4xf32>
@@ -74,22 +74,22 @@
   // CHECK: %[[R0:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  // CHECK-NEXT:   %3 = xla_hlo.add %arg1, %arg1 : tensor<4x4xf32>
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
+  // CHECK-NEXT:   %3 = mhlo.add %arg1, %arg1 : tensor<4x4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
   // CHECK-NEXT: flow.return %3 : tensor<4x4xf32>
   // CHECK-NEXT: }
   // CHECK: %[[R1:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD1]] : index]
   // CHECK-SAME: (%arg1 = %[[R0]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  // CHECK-NEXT:   %3 = "xla_hlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %1 = "xla_hlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  // CHECK-NEXT:   %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
   // CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
   // CHECK-NEXT: }
   // CHECK: %[[R2:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD2]] : index]
   // CHECK-SAME: (%arg1 = %[[R1]] : tensor<4x4xf32>, %arg2 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  // CHECK-NEXT:   %3 = xla_hlo.multiply %arg1, %arg2 : tensor<4x4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4x4xf32>
+  // CHECK-NEXT:   %3 = mhlo.multiply %arg1, %arg2 : tensor<4x4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
   // CHECK-NEXT:   flow.return %3 : tensor<4x4xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return %[[R2]] : tensor<4x4xf32>
@@ -104,12 +104,12 @@
   // CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   %2 = call @callee(%1) : (tensor<4xf32>) -> tensor<4xf32>
   %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
-  // CHECK-NEXT:   %3 = xla_hlo.multiply %2, %arg1 : tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %3 = mhlo.multiply %2, %arg1 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %3 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return %[[R0]] : tensor<4xf32>
@@ -121,8 +121,8 @@
   // CHECK: %[[R0:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.multiply %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.multiply %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK: return %[[R0]] : tensor<4xf32>
@@ -139,11 +139,11 @@
   // CHECK: %[[RESULT:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4x8xf32>, %arg2 = %[[INITIAL]] : tensor<f32>) -> tensor<4xf32>
-  // CHECK-NEXT: = "xla_hlo.reduce"(%arg1, %arg2)
-  %1 = "xla_hlo.reduce"(%arg0, %0) ( {
+  // CHECK-NEXT: = "mhlo.reduce"(%arg1, %arg2)
+  %1 = "mhlo.reduce"(%arg0, %0) ( {
   ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>):
-    %2 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+    %2 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%2) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
   // CHECK: flow.return
   // CHECK: return %[[RESULT]] : tensor<4xf32>
@@ -162,12 +162,12 @@
   // CHECK: %[[RESULT:.+]]:2 = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>, %arg4 = %[[INITIALA]] : tensor<f32>, %arg5 = %[[INITIALB]] : tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
-  // CHECK-NEXT: = "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5)
-  %2, %3 = "xla_hlo.reduce"(%arg0, %arg1, %0, %1) ( {
+  // CHECK-NEXT: = "mhlo.reduce"(%arg2, %arg3, %arg4, %arg5)
+  %2, %3 = "mhlo.reduce"(%arg0, %arg1, %0, %1) ( {
   ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>):
-    %4 = xla_hlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
-    %5 = xla_hlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
-    "xla_hlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
+    %4 = mhlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
+    %5 = mhlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
+    "mhlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
   // CHECK: flow.return
   // CHECK: return %[[RESULT]]#0, %[[RESULT]]#1 : tensor<4xf32>, tensor<4xf32>
@@ -190,10 +190,10 @@
   // CHECK-SAME: %[[CA2:.+]] = %[[A2]] : !shapex.ranked_shape<[?,4]>)
     // Dispatch region should contain captured tie_shapes.
     // CHECK: %[[R1:.+]] = shapex.tie_shape %[[CA0]], %[[CA1]]
-    // CHECK: %[[R2:.+]] = xla_hlo.add %[[R1]], %[[R1]]
+    // CHECK: %[[R2:.+]] = mhlo.add %[[R1]], %[[R1]]
     // CHECK: %[[R3:.+]] = shapex.tie_shape %[[R2]], %[[CA2]]
     // CHECK: flow.return %[[R3]]
-  %1 = xla_hlo.add %0, %0 : tensor<?x4xf32>
+  %1 = mhlo.add %0, %0 : tensor<?x4xf32>
   %2 = shapex.tie_shape %1, %rs1 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
 
   // Lead-out tie_shape should be preserved outside of the dispatch region.
@@ -215,16 +215,16 @@
   // CHECK-SAME: %[[CA1:.+]] = %[[A1]] : !shapex.ranked_shape<[?,4]>
     // Dispatch region should contain captured tie_shapes.
     // CHECK: %[[R1:.+]] = shapex.tie_shape %[[CA0]], %[[CA1]]
-    // CHECK: %[[R2:.+]] = xla_hlo.add %[[R1]], %[[R1]]
+    // CHECK: %[[R2:.+]] = mhlo.add %[[R1]], %[[R1]]
     // CHECK: %[[R3:.+]] = shapex.tie_shape %[[R2]], %[[CA1]]
-    // CHECK: %[[R4:.+]] = xla_hlo.multiply %[[R3]], %[[R1]]
+    // CHECK: %[[R4:.+]] = mhlo.multiply %[[R3]], %[[R1]]
     // CHECK: %[[R5:.+]] = shapex.tie_shape %[[R4]], %[[CA1]]
     // CHECK: flow.return %[[R5]]
 
   %0 = shapex.tie_shape %arg0, %rs0 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
-  %1 = xla_hlo.add %0, %0 : tensor<?x4xf32>
+  %1 = mhlo.add %0, %0 : tensor<?x4xf32>
   %2 = shapex.tie_shape %1, %rs0 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
-  %3 = xla_hlo.multiply %2, %0 : tensor<?x4xf32>
+  %3 = mhlo.multiply %2, %0 : tensor<?x4xf32>
   %4 = shapex.tie_shape %3, %rs0 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
 
   // Lead-out tie_shape should be preserved outside of the dispatch region.
@@ -253,7 +253,7 @@
   // Verify that the ties are preserved (relying on outlining tested previously)
     // CHECK-DAG: %[[DTS0:.+]] = shapex.tie_shape {{.+}}: tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
     // CHECK-DAG: %[[DTS1:.+]] = shapex.tie_shape {{.+}}: tensor<4x?xf32>, !shapex.ranked_shape<[4,?]>
-    // CHECK-DAG: %[[DR0:.+]] = "xla_hlo.dot"(%[[DTS0]], %[[DTS1]])
+    // CHECK-DAG: %[[DR0:.+]] = "mhlo.dot"(%[[DTS0]], %[[DTS1]])
     // CHECK-DAG: %[[DTS1:.+]] = shapex.tie_shape %[[DR0]], {{.+}}: tensor<?x?xf32>, !shapex.ranked_shape<[?,?]>
     // CHECK: flow.return %[[DTS1]]
   // CHECK: }
@@ -262,7 +262,7 @@
   // CHECK: return %[[R1]]
   %0 = shapex.tie_shape %arg0, %rs0 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
   %1 = shapex.tie_shape %arg1, %rs1 : tensor<4x?xf32>, !shapex.ranked_shape<[4,?]>
-  %2 = "xla_hlo.dot"(%0, %1) : (tensor<?x4xf32>, tensor<4x?xf32>) -> tensor<?x?xf32>
+  %2 = "mhlo.dot"(%0, %1) : (tensor<?x4xf32>, tensor<4x?xf32>) -> tensor<?x?xf32>
   %3 = shapex.tie_shape %2, %rs2 : tensor<?x?xf32>, !shapex.ranked_shape<[?,?]>
   return %3 : tensor<?x?xf32>
 }

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
index 87de7a2..696bb42 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_hlo.mlir

@@ -6,8 +6,8 @@
   // CHECK-NEXT: %[[R1:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.add %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.add %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return %[[R1]] : tensor<4xf32>
@@ -21,14 +21,14 @@
   // NOTE: Fragile ordering. Workload constants are emitted in order a the
   // top of the block.
   // CHECK: flow.dispatch.region
-  // CHECK:   xla_hlo.add
+  // CHECK:   mhlo.add
   // CHECK: flow.dispatch.region
-  // CHECK:   "xla_hlo.dot"
+  // CHECK:   "mhlo.dot"
   // CHECK: flow.dispatch.region
-  // CHECK:   xla_hlo.multiply
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = "xla_hlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4x4xf32>
+  // CHECK:   mhlo.multiply
+  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
+  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
   return %2 : tensor<4x4xf32>
 }
 
@@ -39,14 +39,14 @@
   // Because these are all the same benefit, initial formation puts them each
   // in their own region.
   // CHECK: flow.dispatch.region
-  // CHECK:   xla_hlo.add
+  // CHECK:   mhlo.add
   // CHECK: flow.dispatch.region
   // CHECK:   call @callee
   // CHECK: flow.dispatch.region
-  // CHECK:   xla_hlo.multiply
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK:   mhlo.multiply
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   %1 = call @callee(%0) : (tensor<4xf32>) -> tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 
@@ -56,8 +56,8 @@
   // CHECK: %[[R0:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK-NEXT:   %1 = xla_hlo.multiply %arg1, %arg1 : tensor<4xf32>
-  %0 = xla_hlo.multiply %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT:   %1 = mhlo.multiply %arg1, %arg1 : tensor<4xf32>
+  %0 = mhlo.multiply %arg0, %arg0 : tensor<4xf32>
   // CHECK-NEXT:   flow.return %1 : tensor<4xf32>
   // CHECK-NEXT: }
   // CHECK: return %[[R0]] : tensor<4xf32>
@@ -71,10 +71,10 @@
   // Because these are all the same benefit, initial formation puts them each
   // in their own region.
   // CHECK: flow.dispatch.region
-  // CHECK:      "xla_hlo.broadcast_in_dim"
-  // CHECK-NEXT: xla_hlo.add
-  %0 = "xla_hlo.broadcast_in_dim"(%arg0) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor<4xf32>) -> tensor<4x16xf32>
-  %1 = xla_hlo.add %0, %0 : tensor<4x16xf32>
+  // CHECK:      "mhlo.broadcast_in_dim"
+  // CHECK-NEXT: mhlo.add
+  %0 = "mhlo.broadcast_in_dim"(%arg0) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor<4xf32>) -> tensor<4x16xf32>
+  %1 = mhlo.add %0, %0 : tensor<4x16xf32>
   return %1 : tensor<4x16xf32>
 }
 
@@ -88,11 +88,11 @@
   // CHECK: %[[RESULT:.+]] = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg1 = %arg0 : tensor<4x8xf32>, %arg2 = %[[INITIAL]] : tensor<f32>) -> tensor<4xf32>
-  // CHECK-NEXT: = "xla_hlo.reduce"(%arg1, %arg2)
-  %1 = "xla_hlo.reduce"(%arg0, %0) ( {
+  // CHECK-NEXT: = "mhlo.reduce"(%arg1, %arg2)
+  %1 = "mhlo.reduce"(%arg0, %0) ( {
   ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>):
-    %2 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+    %2 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%2) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
   // CHECK: flow.return
   // CHECK: return %[[RESULT]] : tensor<4xf32>
@@ -111,12 +111,12 @@
   // CHECK: %[[RESULT:.+]]:2 = flow.dispatch.region
   // CHECK-SAME: [%[[WORKLOAD0]] : index]
   // CHECK-SAME: (%arg2 = %arg0 : tensor<4x8xf32>, %arg3 = %arg1 : tensor<4x8xf32>, %arg4 = %[[INITIALA]] : tensor<f32>, %arg5 = %[[INITIALB]] : tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
-  // CHECK-NEXT: = "xla_hlo.reduce"(%arg2, %arg3, %arg4, %arg5)
-  %2, %3 = "xla_hlo.reduce"(%arg0, %arg1, %0, %1) ( {
+  // CHECK-NEXT: = "mhlo.reduce"(%arg2, %arg3, %arg4, %arg5)
+  %2, %3 = "mhlo.reduce"(%arg0, %arg1, %0, %1) ( {
   ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>):
-    %4 = xla_hlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
-    %5 = xla_hlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
-    "xla_hlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
+    %4 = mhlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
+    %5 = mhlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
+    "mhlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
   // CHECK: flow.return
   // CHECK: return %[[RESULT]]#0, %[[RESULT]]#1 : tensor<4xf32>, tensor<4xf32>

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
index 7b4095c..fd7566c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/identify_dispatch_regions2_shapes.mlir

@@ -19,10 +19,10 @@
   // CHECK-SAME: %[[CA1:.+]] = %[[A1]] : !shapex.ranked_shape<[?,4]>)
     // Dispatch region should contain captured tie_shapes.
     // CHECK: %[[R1:.+]] = shapex.tie_shape %[[CA0]], %[[CA1]]
-    // CHECK: %[[R2:.+]] = xla_hlo.add %[[R1]], %[[R1]]
+    // CHECK: %[[R2:.+]] = mhlo.add %[[R1]], %[[R1]]
     // CHECK: %[[R3:.+]] = shapex.tie_shape %[[R2]], %[[CA2]]
     // CHECK: flow.return %[[R3]]
-  %1 = xla_hlo.add %0, %0 : tensor<?x4xf32>
+  %1 = mhlo.add %0, %0 : tensor<?x4xf32>
   %2 = shapex.tie_shape %1, %arg2 : tensor<?x4xf32>, !shapex.ranked_shape<[?,4]>
 
   // Lead-out tie_shape should be preserved outside of the dispatch region.

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/legalize_input_types.mlir b/iree/compiler/Dialect/Flow/Transforms/test/legalize_input_types.mlir
index d545441..0b0ff99 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/legalize_input_types.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/legalize_input_types.mlir

@@ -11,8 +11,8 @@
 // CHECK-LABEL: func @hloConstantI64
 // CHECK-SAME: () -> tensor<1xi32>
 func @hloConstantI64() -> tensor<1xi64> {
-  // CHECK-NEXT: xla_hlo.constant dense<123> : tensor<1xi32>
-  %c123 = xla_hlo.constant dense<123> : tensor<1xi64>
+  // CHECK-NEXT: mhlo.constant dense<123> : tensor<1xi32>
+  %c123 = mhlo.constant dense<123> : tensor<1xi64>
   return %c123 : tensor<1xi64>
 }
 
@@ -92,14 +92,14 @@
 // CHECK-LABEL: func @compareI64
 // CHECK-SAME: (%arg0: tensor<i32>, %arg1: tensor<i32>) -> (i1, tensor<i32>)
 func @compareI64(%arg0 : tensor<i64>, %arg1 : tensor<i64>) -> (i1, tensor<i64>) {
-  // CHECK-NEXT: %0 = "xla_hlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+  // CHECK-NEXT: %0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
   // CHECK-NEXT: %1 = extract_element %0[] : tensor<i1>
   // CHECK-NEXT: cond_br %1, ^bb1(%1, %arg0 : i1, tensor<i32>), ^bb2(%1, %arg1 : i1, tensor<i32>)
   // CHECK-NEXT: ^bb1(%2: i1, %3: tensor<i32>): // pred: ^bb0
   // CHECK-NEXT: return %2, %3 : i1, tensor<i32>
   // CHECK-NEXT: ^bb2(%4: i1, %5: tensor<i32>): // pred: ^bb0
   // CHECK-NEXT: return %4, %5 : i1, tensor<i32>
-  %0 = "xla_hlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
+  %0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
   %1 = extract_element %0[] : tensor<i1>
   cond_br %1, ^bb1(%1, %arg0 : i1, tensor<i64>), ^bb2(%1, %arg1 : i1, tensor<i64>)
 ^bb1(%2 : i1, %3 : tensor<i64>):

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/rematerialize_dispatch_constants.mlir b/iree/compiler/Dialect/Flow/Transforms/test/rematerialize_dispatch_constants.mlir
index d37c65e..45a0195 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/rematerialize_dispatch_constants.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/rematerialize_dispatch_constants.mlir

@@ -8,8 +8,8 @@
   // CHECK: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4x4xf32>, %arg2 = %small : tensor<4x4xf32>) -> tensor<4x4xf32> {
     // CHECK-NEXT: %[[REMAT_SMALL:.+]] = constant dense<1.230000e+00> : tensor<4x4xf32>
-    // CHECK-NEXT: %1 = xla_hlo.add %arg1, %[[REMAT_SMALL]] : tensor<4x4xf32>
-    %3 = xla_hlo.add %arg1, %arg2 : tensor<4x4xf32>
+    // CHECK-NEXT: %1 = mhlo.add %arg1, %[[REMAT_SMALL]] : tensor<4x4xf32>
+    %3 = mhlo.add %arg1, %arg2 : tensor<4x4xf32>
     flow.return %3 : tensor<4x4xf32>
   }
   return %0 : tensor<4x4xf32>
@@ -25,8 +25,8 @@
   // CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<1025xi8>) -> tensor<1025xi8> {
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<1025xi8>, %arg2 = %large : tensor<1025xi8>) -> tensor<1025xi8> {
     // CHECK-NEXT: %[[REMAT_SPLAT:.+]] = constant dense<8> : tensor<1025xi8>
-    // CHECK-NEXT: %1 = xla_hlo.add %arg1, %[[REMAT_SPLAT]] : tensor<1025xi8>
-    %3 = xla_hlo.add %arg1, %arg2 : tensor<1025xi8>
+    // CHECK-NEXT: %1 = mhlo.add %arg1, %[[REMAT_SPLAT]] : tensor<1025xi8>
+    %3 = mhlo.add %arg1, %arg2 : tensor<1025xi8>
     flow.return %3 : tensor<1025xi8>
   }
   return %0 : tensor<1025xi8>
@@ -42,8 +42,8 @@
   %large = constant dense<[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,0]> : tensor<1025xi8>
   // CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<1025xi8>, %arg2 = %[[CST]] : tensor<1025xi8>) -> tensor<1025xi8> {
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<1025xi8>, %arg2 = %large : tensor<1025xi8>) -> tensor<1025xi8> {
-    // CHECK-NEXT: %1 = xla_hlo.add %arg1, %arg2 : tensor<1025xi8>
-    %3 = xla_hlo.add %arg1, %arg2 : tensor<1025xi8>
+    // CHECK-NEXT: %1 = mhlo.add %arg1, %arg2 : tensor<1025xi8>
+    %3 = mhlo.add %arg1, %arg2 : tensor<1025xi8>
     flow.return %3 : tensor<1025xi8>
   }
   return %0 : tensor<1025xi8>
@@ -59,8 +59,8 @@
   %small = constant dense<1.23> : tensor<4x4xf32>
   // CHECK-NEXT: %[[R0:.+]] = flow.dispatch.region[%[[WORKLOAD0]] : index](%arg1 = %arg0 : tensor<4x4xf32>, %arg2 = %[[SMALL]] : tensor<4x4xf32>) -> tensor<4x4xf32> {
   %0 = flow.dispatch.region[%cst : index](%arg1 = %arg0 : tensor<4x4xf32>, %arg2 = %small : tensor<4x4xf32>) -> tensor<4x4xf32> {
-    // CHECK-NEXT: %1 = "xla_hlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-    %3 = "xla_hlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+    // CHECK-NEXT: %1 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+    %3 = "mhlo.dot"(%arg1, %arg2) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
     flow.return %3 : tensor<4x4xf32>
   }
   return %0 : tensor<4x4xf32>

diff --git a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
index 4aecd2a..7d0ab56 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/transformation.mlir

@@ -9,7 +9,7 @@
 // -----
 
 func @simpleMath(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   return %0 : tensor<4xf32>
 }
 
@@ -17,7 +17,7 @@
 // CHECK-NEXT:   flow.dispatch.entry @simpleMath_ex_dispatch_0
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @simpleMath_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:       %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
 // CHECK-NEXT:       return %0 : tensor<4xf32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
@@ -63,9 +63,9 @@
 // -----
 
 func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = xla_hlo.subtract %0, %arg0 : tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
+  %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 
@@ -73,9 +73,9 @@
 // CHECK-NEXT:   flow.dispatch.entry @hloElementwiseOps_ex_dispatch_0
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @hloElementwiseOps_ex_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK-NEXT:       %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %1 = xla_hlo.subtract %0, %arg0 : tensor<4xf32>
-// CHECK-NEXT:       %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
+// CHECK-NEXT:       %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
+// CHECK-NEXT:       %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
 // CHECK-NEXT:       return %2 : tensor<4xf32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
@@ -92,9 +92,9 @@
 // -----
 
 func @interleavedDot(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
-  %1 = "xla_hlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4x4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
+  %1 = "mhlo.dot"(%0, %arg0) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4x4xf32>
   return %2 : tensor<4x4xf32>
 }
 
@@ -102,7 +102,7 @@
 // CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_0
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @interleavedDot_ex_dispatch_0(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = xla_hlo.add %arg0, %arg0 : tensor<4x4xf32>
+// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg0 : tensor<4x4xf32>
 // CHECK-NEXT:       return %0 : tensor<4x4xf32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
@@ -111,7 +111,7 @@
 // CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_1
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @interleavedDot_ex_dispatch_1(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = "xla_hlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT:       %0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
 // CHECK-NEXT:       return %0 : tensor<4x4xf32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
@@ -120,7 +120,7 @@
 // CHECK-NEXT:   flow.dispatch.entry @interleavedDot_ex_dispatch_2
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @interleavedDot_ex_dispatch_2(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32> {
-// CHECK-NEXT:       %0 = xla_hlo.multiply %arg0, %arg1 : tensor<4x4xf32>
+// CHECK-NEXT:       %0 = mhlo.multiply %arg0, %arg1 : tensor<4x4xf32>
 // CHECK-NEXT:       return %0 : tensor<4x4xf32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
@@ -140,10 +140,10 @@
 
 func @reduction(%arg0 : tensor<4x8xf32>) -> tensor<4xf32> {
   %0 = constant dense<0.0> : tensor<f32>
-  %1 = "xla_hlo.reduce"(%arg0, %0) ( {
+  %1 = "mhlo.reduce"(%arg0, %0) ( {
   ^bb0(%arg1 : tensor<f32>, %arg2 : tensor<f32>):
-    %2 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+    %2 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%2) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
   return %1 : tensor<4xf32>
 }
@@ -153,10 +153,10 @@
 //  CHECK-NEXT:   module {
 //  CHECK-NEXT:     func @reduction_ex_dispatch_0(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
 //  CHECK-NEXT:       %cst = constant dense<0.000000e+00> : tensor<f32>
-//  CHECK-NEXT:       %0 = "xla_hlo.reduce"(%arg0, %cst) ( {
+//  CHECK-NEXT:       %0 = "mhlo.reduce"(%arg0, %cst) ( {
 //  CHECK-NEXT:       ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors
-//  CHECK-NEXT:         %1 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-//  CHECK-NEXT:         "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+//  CHECK-NEXT:         %1 = mhlo.add %arg1, %arg2 : tensor<f32>
+//  CHECK-NEXT:         "mhlo.return"(%1) : (tensor<f32>) -> ()
 //  CHECK-NEXT:       }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
 //  CHECK-NEXT:       return %0 : tensor<4xf32>
 //  CHECK-NEXT:     }
@@ -174,8 +174,8 @@
 // -----
 
 func @dynamicUpdateSlice(%operand : tensor<2x4xi32>, %update : tensor<1x1xi32>, %indices_0 : tensor<i64>, %indices_1 : tensor<i64>) -> tensor<2x4xi32> {
-  %0 = "xla_hlo.dynamic-update-slice"(%operand, %update, %indices_0, %indices_1) : (tensor<2x4xi32>, tensor<1x1xi32>, tensor<i64>, tensor<i64>) -> tensor<2x4xi32>
-  %1 = xla_hlo.add %operand, %0 : tensor<2x4xi32>
+  %0 = "mhlo.dynamic-update-slice"(%operand, %update, %indices_0, %indices_1) : (tensor<2x4xi32>, tensor<1x1xi32>, tensor<i64>, tensor<i64>) -> tensor<2x4xi32>
+  %1 = mhlo.add %operand, %0 : tensor<2x4xi32>
   return %1 : tensor<2x4xi32>
 }
 
@@ -183,7 +183,7 @@
 // CHECK-NEXT: flow.dispatch.entry @dynamicUpdateSlice_ex_dispatch_0
 // CHECK-NEXT:   module {
 // CHECK-NEXT:     func @dynamicUpdateSlice_ex_dispatch_0(%arg0: tensor<2x4xi32>, %arg1: tensor<2x4xi32>) -> tensor<2x4xi32> {
-// CHECK-NEXT:       %0 = xla_hlo.add %arg0, %arg1 : tensor<2x4xi32>
+// CHECK-NEXT:       %0 = mhlo.add %arg0, %arg1 : tensor<2x4xi32>
 // CHECK-NEXT:       return %0 : tensor<2x4xi32>
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }

diff --git a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
index c7da796..1d89486 100644
--- a/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp
+++ b/iree/compiler/Dialect/Flow/Utils/DispatchUtils.cpp

@@ -32,7 +32,7 @@
   // TODO(benvanik): replace with op dispatchability interface to allow dialects
   // to opt into dispatch.
   auto dialectNamespace = op->getDialect()->getNamespace();
-  return dialectNamespace == xla_hlo::XlaHloDialect::getDialectNamespace() ||
+  return dialectNamespace == mhlo::XlaHloDialect::getDialectNamespace() ||
          dialectNamespace == mlir::StandardOpsDialect::getDialectNamespace() ||
          dialectNamespace == FlowDialect::getDialectNamespace() ||
          dialectNamespace == ShapeDialect::getDialectNamespace();

diff --git a/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir b/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
index db2892e..048158a 100644
--- a/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir
+++ b/iree/compiler/Dialect/HAL/IR/test/executable_ops.mlir

@@ -7,8 +7,8 @@
   %c16 = constant 16 : index
   // CHECK: %[[ARG0:.+]] = hal.interface.load.tensor @interface::@s0b0, offset = %c16 : tensor<4xf32>
   %arg0 = hal.interface.load.tensor @interface::@s0b0, offset=%c16 : tensor<4xf32>
-  // CHECK-NEXT: %[[TEMP:.+]] = xla_hlo.add %[[ARG0]], %[[ARG0]]
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  // CHECK-NEXT: %[[TEMP:.+]] = mhlo.add %[[ARG0]], %[[ARG0]]
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   %c32 = constant 32 : index
   // CHECK: hal.interface.store.tensor %[[TEMP]], @interface::@s0b1, offset = %c32 : tensor<4xf32>
   hal.interface.store.tensor %0, @interface::@s0b1, offset=%c32 : tensor<4xf32>

diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/test/binaryop_test.mlir b/iree/compiler/Dialect/HAL/Target/LLVM/test/binaryop_test.mlir
index 917b8a7..3c8675a 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/test/binaryop_test.mlir
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/test/binaryop_test.mlir

@@ -5,7 +5,7 @@
   }
   module {
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }

diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/test/matmul_op.mlir b/iree/compiler/Dialect/HAL/Target/LLVM/test/matmul_op.mlir
index 4775e58..9760820 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/test/matmul_op.mlir
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/test/matmul_op.mlir

@@ -5,7 +5,7 @@
   }
   module {
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4x3xf32>, %arg1: tensor<3x4xf32>) -> tensor<4x4xf32> {
-      %0 = "xla_hlo.dot"(%arg0, %arg1) : (tensor<4x3xf32>, tensor<3x4xf32>) -> tensor<4x4xf32>
+      %0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x3xf32>, tensor<3x4xf32>) -> tensor<4x4xf32>
       return %0 : tensor<4x4xf32>
     }
   }

diff --git a/iree/compiler/Dialect/HAL/Target/VMLA/test/smoketest.mlir b/iree/compiler/Dialect/HAL/Target/VMLA/test/smoketest.mlir
index 5c3b26d..aea7465 100644
--- a/iree/compiler/Dialect/HAL/Target/VMLA/test/smoketest.mlir
+++ b/iree/compiler/Dialect/HAL/Target/VMLA/test/smoketest.mlir

@@ -6,7 +6,7 @@
   }
   module {
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -48,7 +48,7 @@
     func @entry(%arg0: tensor<4x?xf32>, %arg1 : index) -> tensor<4x?xf32> {
       %0 = shapex.make_ranked_shape %arg1 : (index) -> !shapex.ranked_shape<[4,?]>
       %1 = shapex.tie_shape %arg0, %0 : tensor<4x?xf32>, !shapex.ranked_shape<[4,?]>
-      %2 = xla_hlo.add %1, %1 : tensor<4x?xf32>
+      %2 = mhlo.add %1, %1 : tensor<4x?xf32>
       %3 = shapex.tie_shape %2, %0 : tensor<4x?xf32>, !shapex.ranked_shape<[4,?]>
       return %3 : tensor<4x?xf32>
     }
@@ -87,10 +87,10 @@
   module {
     func @reduction_ex_dispatch_0(%arg0: tensor<4x8xf32>) -> tensor<4xf32> {
       %cst = constant dense<0.000000e+00> : tensor<f32>
-      %0 = "xla_hlo.reduce"(%arg0, %cst) ( {
+      %0 = "mhlo.reduce"(%arg0, %cst) ( {
       ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
-        %1 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-        "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+        %1 = mhlo.add %arg1, %arg2 : tensor<f32>
+        "mhlo.return"(%1) : (tensor<f32>) -> ()
       }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
       return %0 : tensor<4xf32>
     }

diff --git a/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir b/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir
index 269ce14..aa5e5e5 100644
--- a/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir
+++ b/iree/compiler/Dialect/HAL/Target/test/smoketest.mlir

@@ -6,7 +6,7 @@
   }
   module {
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }
@@ -30,10 +30,10 @@
   }
   module {
     func @reduction_rgn_reduce_0_dim_0_entry(%arg0 : tensor<4x8xf32>, %arg1 : tensor<f32>) -> tensor<4xf32> {
-      %0 = "xla_hlo.reduce"(%arg0, %arg1) ( {
+      %0 = "mhlo.reduce"(%arg0, %arg1) ( {
       ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
-        %1 = xla_hlo.add %arg3, %arg4 : tensor<f32>
-        "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+        %1 = mhlo.add %arg3, %arg4 : tensor<f32>
+        "mhlo.return"(%1) : (tensor<f32>) -> ()
       }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
       return %0 : tensor<4xf32>
     }

diff --git a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
index f3fa94f..9c7193e 100644
--- a/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir
+++ b/iree/compiler/Dialect/HAL/Transforms/test/materialize_interfaces.mlir

@@ -26,7 +26,7 @@
     // CHECK-NEXT: }
     // CHECK-NEXT: func @simpleMath_rgn_dispatch_0_impl
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
     // CHECK: hal.interface @legacy_io attributes {sym_visibility = "private"}
@@ -64,7 +64,7 @@
       %0 = shapex.make_ranked_shape %arg1 : (index) -> !shapex.ranked_shape<[?,7,10]>
       %1 = shapex.make_ranked_shape %arg2 : (index) -> !shapex.ranked_shape<[7,?,10]>
       %2 = shapex.tie_shape %arg0, %0 : tensor<?x7x10xf32>, !shapex.ranked_shape<[?,7,10]>
-      %3 = "xla_hlo.transpose"(%2) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<?x7x10xf32>) -> tensor<7x?x10xf32>
+      %3 = "mhlo.transpose"(%2) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<?x7x10xf32>) -> tensor<7x?x10xf32>
       %4 = shapex.tie_shape %3, %1 : tensor<7x?x10xf32>, !shapex.ranked_shape<[7,?,10]>
       return %4 : tensor<7x?x10xf32>
     }

diff --git a/iree/compiler/Dialect/IREE/IR/IREEOps.td b/iree/compiler/Dialect/IREE/IR/IREEOps.td
index b490f96..a2e2229 100644
--- a/iree/compiler/Dialect/IREE/IR/IREEOps.td
+++ b/iree/compiler/Dialect/IREE/IR/IREEOps.td

@@ -85,7 +85,7 @@
 
     ```mlir
     %c = iree.dynamic_shape_constant tensor<2x2xf32> -> tensor<?x?xf32>
-    %res = "xla_hlo.abs"(%c) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+    %res = "mhlo.abs"(%c) : (tensor<?x?xf32>) -> tensor<?x?xf32>
     ```
   }];
   let arguments = (ins ElementsAttr:$value);

diff --git a/iree/compiler/Dialect/Shape/IR/ShapeBase.td b/iree/compiler/Dialect/Shape/IR/ShapeBase.td
index 654e8cf..17bb129 100644
--- a/iree/compiler/Dialect/Shape/IR/ShapeBase.td
+++ b/iree/compiler/Dialect/Shape/IR/ShapeBase.td

@@ -43,7 +43,7 @@
 // here. There seem to be two primary motivators right now, both of which are
 // not obviously ideal long-term:
 //
-// 1. xla_hlo dialect uses i64 in many places that index should be used.
+// 1. mhlo dialect uses i64 in many places that index should be used.
 //    This is understood to be a bug.
 // 2. VMLA gets these values as i32 directly.
 //

diff --git a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp
index beff84a..8d37751 100644
--- a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp
+++ b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.cpp

@@ -30,7 +30,7 @@
 using namespace mlir::iree_compiler::Shape;
 
 namespace mlir {
-namespace xla_hlo {
+namespace mhlo {
 namespace {
 
 template <typename HloOp>
@@ -338,10 +338,9 @@
       rewriteShapexRankedBroadcastInDim);
   b.insertOpRankedShapeBuilder<ReduceOp>(rewriteReduce);
   b.insertOpRankedShapeBuilder<TransposeOp>(rewriteTranspose);
-  b.insertOpRankedShapeBuilder<xla_hlo::DotGeneralOp>(rewriteDotGeneral);
-  b.insertOpRankedShapeBuilder<xla_hlo::DynamicReshapeOp>(
-      rewriteDynamicReshape);
+  b.insertOpRankedShapeBuilder<mhlo::DotGeneralOp>(rewriteDotGeneral);
+  b.insertOpRankedShapeBuilder<mhlo::DynamicReshapeOp>(rewriteDynamicReshape);
 }
 
-}  // namespace xla_hlo
+}  // namespace mhlo
 }  // namespace mlir

diff --git a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.h b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.h
index 91f7f15..ecea2b7 100644
--- a/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.h
+++ b/iree/compiler/Dialect/Shape/Plugins/XLA/XlaHloShapeBuilder.h

@@ -18,14 +18,14 @@
 #include "iree/compiler/Dialect/Shape/IR/ShapeInterface.h"
 
 namespace mlir {
-namespace xla_hlo {
+namespace mhlo {
 
 // Creates a custom op shape builder for XLA-HLO ops that are not otherwise
 // supported through traits or other declarative means.
 void populateXlaHloCustomOpShapeBuilder(
     iree_compiler::Shape::CustomOpShapeBuilderList &builders);
 
-}  // namespace xla_hlo
+}  // namespace mhlo
 }  // namespace mlir
 
 #endif  // IREE_COMPILER_DIALECT_SHAPE_IR_XLAHLOSHAPEBUILDER_H_

diff --git a/iree/compiler/Dialect/Shape/Plugins/XLA/test/custom_ops.mlir b/iree/compiler/Dialect/Shape/Plugins/XLA/test/custom_ops.mlir
index 59882e5..4a03bf9 100644
--- a/iree/compiler/Dialect/Shape/Plugins/XLA/test/custom_ops.mlir
+++ b/iree/compiler/Dialect/Shape/Plugins/XLA/test/custom_ops.mlir

@@ -3,9 +3,9 @@
 // CHECK-LABEL: @transpose
 func @transpose(%arg0: tensor<?x7x10xf32>, %arg1: !shapex.ranked_shape<[?,7,10]>) -> (tensor<7x?x10xf32>, !shapex.ranked_shape<[7,?,10]>) {
   %tied = shapex.tie_shape %arg0, %arg1 : tensor<?x7x10xf32>, !shapex.ranked_shape<[?,7,10]>
-  %0 = "xla_hlo.transpose"(%tied) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} :
+  %0 = "mhlo.transpose"(%tied) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} :
       (tensor<?x7x10xf32>) -> tensor<7x?x10xf32>
-  // CHECK-DAG: %[[RESULT:.+]] = "xla_hlo.transpose"
+  // CHECK-DAG: %[[RESULT:.+]] = "mhlo.transpose"
   // CHECK-DAG: %[[DIM:.+]] = shapex.ranked_dim %arg1[0]
   // CHECK-DAG: %[[SHAPE:.+]] = shapex.make_ranked_shape %[[DIM]]
   %1 = shapex.get_ranked_shape %0 : tensor<7x?x10xf32> -> !shapex.ranked_shape<[7,?,10]>
@@ -26,7 +26,7 @@
   // CHECK-DAG: %[[EXTENT2:.+]] = shapex.ranked_dim %arg3[2]
   // CHECK-DAG: %[[SHAPE:.+]] = shapex.make_ranked_shape %[[EXTENT0]], %[[EXTENT1]], %[[EXTENT2]]
   // CHECK-DAG: return %[[SHAPE]]
-  %0 = "xla_hlo.dot_general"(%tie0, %tie1) { dot_dimension_numbers = {
+  %0 = "mhlo.dot_general"(%tie0, %tie1) { dot_dimension_numbers = {
     lhs_batching_dimensions = dense<0> : tensor<1xi64>,
     lhs_contracting_dimensions = dense<2> : tensor<1xi64>,
     rhs_batching_dimensions = dense<0> : tensor<1xi64>,
@@ -42,7 +42,7 @@
 func @dynamic_reshape(%arg0: tensor<?xf32>, %arg1: tensor<2xindex>) -> !shapex.ranked_shape<[?,?]> {
   // CHECK-DAG: %[[SHAPE:.+]] = "shapex.from_extent_tensor"(%arg1)
   // CHECK-DAG: return %[[SHAPE]]
-  %0 = "xla_hlo.dynamic_reshape"(%arg0, %arg1) : (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
+  %0 = "mhlo.dynamic_reshape"(%arg0, %arg1) : (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
   %1 = shapex.get_ranked_shape %0 : tensor<?x?xf32> -> !shapex.ranked_shape<[?,?]>
   return %1 : !shapex.ranked_shape<[?,?]>
 }

diff --git a/iree/compiler/Dialect/Shape/Transforms/ConvertHLOToShapeDialectPass.cpp b/iree/compiler/Dialect/Shape/Transforms/ConvertHLOToShapeDialectPass.cpp
index 04eab30..c184492 100644
--- a/iree/compiler/Dialect/Shape/Transforms/ConvertHLOToShapeDialectPass.cpp
+++ b/iree/compiler/Dialect/Shape/Transforms/ConvertHLOToShapeDialectPass.cpp

@@ -32,12 +32,12 @@
 namespace {
 
 class ConvertDynamicBroadcastInDim
-    : public OpConversionPattern<xla_hlo::DynamicBroadcastInDimOp> {
+    : public OpConversionPattern<mhlo::DynamicBroadcastInDimOp> {
   using OpConversionPattern::OpConversionPattern;
   LogicalResult matchAndRewrite(
-      xla_hlo::DynamicBroadcastInDimOp op, ArrayRef<Value> operands,
+      mhlo::DynamicBroadcastInDimOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
-    xla_hlo::DynamicBroadcastInDimOp::Adaptor adapter(operands);
+    mhlo::DynamicBroadcastInDimOp::Adaptor adapter(operands);
     Value rankedShape = rewriter.create<Shape::FromExtentTensorOp>(
         op.getLoc(), adapter.output_dimensions());
     rewriter.replaceOpWithNewOp<Shape::RankedBroadcastInDimOp>(
@@ -55,9 +55,9 @@
 
     conversionTarget.addLegalDialect<ShapeDialect>();
     conversionTarget.addLegalDialect<StandardOpsDialect>();
-    conversionTarget.addLegalDialect<xla_hlo::XlaHloDialect>();
+    conversionTarget.addLegalDialect<mhlo::XlaHloDialect>();
 
-    conversionTarget.addIllegalOp<xla_hlo::DynamicBroadcastInDimOp>();
+    conversionTarget.addIllegalOp<mhlo::DynamicBroadcastInDimOp>();
     conversionPatterns.insert<ConvertDynamicBroadcastInDim>(&getContext());
 
     if (failed(applyPartialConversion(getFunction(), conversionTarget,

diff --git a/iree/compiler/Dialect/Shape/Transforms/MaterializeShapeCalculations.cpp b/iree/compiler/Dialect/Shape/Transforms/MaterializeShapeCalculations.cpp
index fff9061..2053454 100644
--- a/iree/compiler/Dialect/Shape/Transforms/MaterializeShapeCalculations.cpp
+++ b/iree/compiler/Dialect/Shape/Transforms/MaterializeShapeCalculations.cpp

@@ -47,7 +47,7 @@
 const CustomOpShapeBuilderList *getCustomOpShapeBuilder() {
   static CustomOpShapeBuilderList globalBuilders = ([]() {
     CustomOpShapeBuilderList builders;
-    xla_hlo::populateXlaHloCustomOpShapeBuilder(builders);
+    mhlo::populateXlaHloCustomOpShapeBuilder(builders);
     IREE::VMLA::populateVMLACustomOpShapeBuilder(builders);
     return builders;
   })();

diff --git a/iree/compiler/Dialect/Shape/Transforms/test/convert_hlo_to_shape_dialect.mlir b/iree/compiler/Dialect/Shape/Transforms/test/convert_hlo_to_shape_dialect.mlir
index ac5b13b..1ddddcb 100644
--- a/iree/compiler/Dialect/Shape/Transforms/test/convert_hlo_to_shape_dialect.mlir
+++ b/iree/compiler/Dialect/Shape/Transforms/test/convert_hlo_to_shape_dialect.mlir

@@ -5,6 +5,6 @@
   // CHECK-DAG: %[[SHAPE:.+]] = "shapex.from_extent_tensor"(%arg1) : (tensor<2xindex>) -> !shapex.ranked_shape<[?,?]>
   // CHECK-DAG: %[[BROADCASTED:.+]] = "shapex.ranked_broadcast_in_dim"(%arg0, %0) {broadcast_dimensions = dense<1> : tensor<1xi64>}
   // CHECK-DAG: return %[[BROADCASTED]]
-  %0 = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %arg1) {broadcast_dimensions = dense<[1]> : tensor<1xi64>}: (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
+  %0 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %arg1) {broadcast_dimensions = dense<[1]> : tensor<1xi64>}: (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }

diff --git a/iree/compiler/Dialect/Shape/Transforms/test/materialize_shape_calculations.mlir b/iree/compiler/Dialect/Shape/Transforms/test/materialize_shape_calculations.mlir
index f830f3d..50a158d 100644
--- a/iree/compiler/Dialect/Shape/Transforms/test/materialize_shape_calculations.mlir
+++ b/iree/compiler/Dialect/Shape/Transforms/test/materialize_shape_calculations.mlir

@@ -7,10 +7,10 @@
   // CHECK-NOT: shapex.tie_shape
   // CHECK-NOT: shapex.get_ranked_shape
   %0 = shapex.tie_shape %arg0, %arg1 : tensor<?x2xf32>, !shapex.ranked_shape<[?,2]>
-  // CHECK: %[[ABS:.+]] = "xla_hlo.abs"(%[[T]])
+  // CHECK: %[[ABS:.+]] = "mhlo.abs"(%[[T]])
   // The only thing special about abs is that we have a compile time shape
   // calculation for it.
-  %1 = "xla_hlo.abs"(%0) : (tensor<?x2xf32>) -> (tensor<?x2xf32>)
+  %1 = "mhlo.abs"(%0) : (tensor<?x2xf32>) -> (tensor<?x2xf32>)
   %2 = shapex.get_ranked_shape %1 : tensor<?x2xf32> -> !shapex.ranked_shape<[?,2]>
   // CHECK: return %[[ABS]], %[[SHAPE]]
   return %1, %2 : tensor<?x2xf32>, !shapex.ranked_shape<[?,2]>

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HALToVMLA/test/interface_ops.mlir b/iree/compiler/Dialect/VMLA/Conversion/HALToVMLA/test/interface_ops.mlir
index 428f374..f914fec 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HALToVMLA/test/interface_ops.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HALToVMLA/test/interface_ops.mlir

@@ -13,7 +13,7 @@
   %0 = hal.interface.load.tensor @legacy_io::@arg0, offset = %c0 : tensor<f32>
   // CHECK-NEXT: %[[TEMP:.+]] = vmla.buffer.alloc byte_length = %[[C4]] : !vmla.buffer
   // CHECK-NEXT: vmla.add %[[ARG0]], %[[CST1]], out %[[TEMP]] : f32
-  %1 = xla_hlo.add %0, %cst : tensor<f32>
+  %1 = mhlo.add %0, %cst : tensor<f32>
   // CHECK-NEXT: %[[SET0BINDING1:.+]] = vmla.interface.binding %[[INTERFACE]] {binding = 1 : i32, set = 0 : i32} : !vmla.buffer
   // CHECK-NEXT: vmla.buffer.copy %[[TEMP]][%[[C0]]], out %[[SET0BINDING1]][%[[C0]]], byte_length = %[[C4]]
   hal.interface.store.tensor %1, @legacy_io::@ret0, offset = %c0 : tensor<f32>
@@ -21,7 +21,7 @@
 }
 func @inc_rgn_dispatch_0_impl(%arg0: tensor<f32>) -> tensor<f32> attributes {iree.module.export, sym_visibility = "private"} {
   %cst = constant dense<1.000000e+00> : tensor<f32>
-  %0 = xla_hlo.add %arg0, %cst : tensor<f32>
+  %0 = mhlo.add %arg0, %cst : tensor<f32>
   return %0 : tensor<f32>
 }
 hal.interface @legacy_io attributes {sym_visibility = "private"} {

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertConvOps.cpp b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertConvOps.cpp
index ef07581..db4f63f 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertConvOps.cpp
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertConvOps.cpp

@@ -38,13 +38,13 @@
 
 namespace {
 
-struct VMLAConvOpConverter : public OpConversionPattern<xla_hlo::ConvOp> {
+struct VMLAConvOpConverter : public OpConversionPattern<mhlo::ConvOp> {
   using OpConversionPattern::OpConversionPattern;
   VMLAConvOpConverter(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ConvOp op, ArrayRef<Value> operands,
+      mhlo::ConvOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     if (op.dimension_numbers()) {
       const auto dimensionNumbers = op.dimension_numbers();

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertHLOToVMLA.cpp b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertHLOToVMLA.cpp
index 496c60a..5fba75f 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertHLOToVMLA.cpp
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertHLOToVMLA.cpp

@@ -60,7 +60,7 @@
   LogicalResult matchAndRewrite(
       SRC srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
-    // xla_hlo::DynamicReshape has multiple operands, so we cannot just say
+    // mhlo::DynamicReshape has multiple operands, so we cannot just say
     // `getOperand()`. But `getOperand(0)` doesn't work for the other
     // single-operand ops. So use the raw Operation to get the operand.
     if (srcOp.getOperation()->getOperand(0).hasOneUse()) {
@@ -82,7 +82,7 @@
 // Converts a shapex.ranked_broadcast_in_dim op to either a broadcast or a tile
 // depending on the input shape.
 //
-// We assume that xla_hlo.broadcast_in_dim and xla_hlo.dynamic_broadcast_in_dim
+// We assume that mhlo.broadcast_in_dim and mhlo.dynamic_broadcast_in_dim
 // have been legalized into that op.
 //
 // Note that shapex.ranked_broadcast_in_dim is not strictly speaking an HLO op,
@@ -143,9 +143,9 @@
   TypeConverter &typeConverter;
 };
 
-struct CanonicalizeBroadcastOp : public OpRewritePattern<xla_hlo::BroadcastOp> {
+struct CanonicalizeBroadcastOp : public OpRewritePattern<mhlo::BroadcastOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::BroadcastOp op,
+  LogicalResult matchAndRewrite(mhlo::BroadcastOp op,
                                 PatternRewriter &rewriter) const override {
     SmallVector<int64_t, 6> broadcastDimensions;
     RankedTensorType inputType =
@@ -163,7 +163,7 @@
                                         rewriter.getIntegerType(64));
       return DenseIntElementsAttr::get(type, integers);
     };
-    rewriter.replaceOpWithNewOp<xla_hlo::BroadcastInDimOp>(
+    rewriter.replaceOpWithNewOp<mhlo::BroadcastInDimOp>(
         op, op.getType(), op.getOperand(),
         make1DElementsAttr(broadcastDimensions));
     return success();
@@ -172,12 +172,12 @@
 
 // Converts a concat into a set of copies into the destination buffer.
 struct ConcatenateOpConversion
-    : public OpConversionPattern<xla_hlo::ConcatenateOp> {
+    : public OpConversionPattern<mhlo::ConcatenateOp> {
   ConcatenateOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ConcatenateOp srcOp, ArrayRef<Value> operands,
+      mhlo::ConcatenateOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto zero = rewriter.createOrFold<mlir::ConstantIndexOp>(srcOp.getLoc(), 0);
 
@@ -223,16 +223,16 @@
 // Lowers a subset of gathers along axis 0 that are really just a slice and
 // reshape.
 // TODO(ataei): Move this to vmla.gather lowering.
-struct GatherOpConversion : public OpConversionPattern<xla_hlo::GatherOp> {
+struct GatherOpConversion : public OpConversionPattern<mhlo::GatherOp> {
   GatherOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   // TODO(gcmn): This only handles a minimal number of cases. When XLA
   // redefines gather to be simpler, lower it properly.
   LogicalResult matchAndRewrite(
-      xla_hlo::GatherOp gatherOp, ArrayRef<Value> operandValues,
+      mhlo::GatherOp gatherOp, ArrayRef<Value> operandValues,
       ConversionPatternRewriter &rewriter) const override {
-    xla_hlo::GatherOp::Adaptor operands(operandValues);
+    mhlo::GatherOp::Adaptor operands(operandValues);
     auto dimension_numbers = gatherOp.dimension_numbers();
     if (dimension_numbers.index_vector_dim().getValue().getSExtValue() != 0) {
       gatherOp.emitRemark()
@@ -347,12 +347,12 @@
 };
 
 // Converts a static slice op to a copy (if the source must be preserved).
-struct SliceOpConversion : public OpConversionPattern<xla_hlo::SliceOp> {
+struct SliceOpConversion : public OpConversionPattern<mhlo::SliceOp> {
   SliceOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::SliceOp srcOp, ArrayRef<Value> operands,
+      mhlo::SliceOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto isNotOne = [](APInt stride) { return stride != 1; };
     if (llvm::any_of(srcOp.strides(), isNotOne)) {
@@ -400,14 +400,14 @@
 
 // Converts a dynamic slice op to a copy (if the source must be preserved).
 struct DynamicSliceOpConversion
-    : public OpConversionPattern<xla_hlo::DynamicSliceOp> {
+    : public OpConversionPattern<mhlo::DynamicSliceOp> {
   DynamicSliceOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::DynamicSliceOp srcOp, ArrayRef<Value> rawOperands,
+      mhlo::DynamicSliceOp srcOp, ArrayRef<Value> rawOperands,
       ConversionPatternRewriter &rewriter) const override {
-    xla_hlo::DynamicSliceOp::Adaptor operands(rawOperands);
+    mhlo::DynamicSliceOp::Adaptor operands(rawOperands);
     // TODO(benvanik): if the source is only used by this op then replace with
     // a vmla.buffer.view op.
 
@@ -448,12 +448,12 @@
   TypeConverter &typeConverter;
 };
 
-struct CompareOpConversion : public OpConversionPattern<xla_hlo::CompareOp> {
+struct CompareOpConversion : public OpConversionPattern<mhlo::CompareOp> {
   CompareOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::CompareOp srcOp, ArrayRef<Value> rawOperands,
+      mhlo::CompareOp srcOp, ArrayRef<Value> rawOperands,
       ConversionPatternRewriter &rewriter) const override {
     auto linputType = srcOp.lhs().getType().dyn_cast<ShapedType>();
     auto rinputType = srcOp.rhs().getType().dyn_cast<ShapedType>();
@@ -509,21 +509,21 @@
   TypeConverter &typeConverter;
 };
 
-struct ConvertOpConversion : public OpConversionPattern<xla_hlo::ConvertOp> {
+struct ConvertOpConversion : public OpConversionPattern<mhlo::ConvertOp> {
   ConvertOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ConvertOp srcOp, ArrayRef<Value> rawOperands,
+      mhlo::ConvertOp srcOp, ArrayRef<Value> rawOperands,
       ConversionPatternRewriter &rewriter) const override {
     auto srcType = srcOp.operand().getType().cast<ShapedType>();
     auto dstType = srcOp.getResult().getType().cast<ShapedType>();
 
-    // The xla_hlo.convert op can have the same src and dst element types, in
+    // The mhlo.convert op can have the same src and dst element types, in
     // which case it just represents a static structural annotation of a shape
     // change, so it is just an identity op at runtime.
     if (srcType.getElementType() == dstType.getElementType()) {
-      return IdentityOpConversion<xla_hlo::ConvertOp>{rewriter.getContext()}
+      return IdentityOpConversion<mhlo::ConvertOp>{rewriter.getContext()}
           .matchAndRewrite(srcOp, rawOperands, rewriter);
     }
 
@@ -552,12 +552,12 @@
   // We rely on some additional HLO->std patterns and assume they
   // have been run already. In case they haven't we provide them here (useful
   // for standalone conversion testing).
-  xla_hlo::PopulateXlaToStdPatterns(&patterns, context);
+  mhlo::PopulateXlaToStdPatterns(&patterns, context);
 
-  // xla_hlo.convolution.
+  // mhlo.convolution.
   populateHLOConvToVMLAPatterns(context, patterns, typeConverter);
 
-  // xla_hlo.reduce and xla_hlo.reduce_window.
+  // mhlo.reduce and mhlo.reduce_window.
   populateHLOReductionToVMLAPatterns(context, patterns, typeConverter);
 
   // vmla.batch.matmul.pseudo
@@ -567,83 +567,81 @@
 
   // Simple 1:1 conversion patterns using the automated trait-based converter.
   // Used for HLO ops that have equivalent VMLA ops such as most arithmetic ops.
-  patterns.insert<VMLAOpConversion<xla_hlo::AddOp, IREE::VMLA::AddOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::AddOp, IREE::VMLA::AddOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::SubOp, IREE::VMLA::SubOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::SubOp, IREE::VMLA::SubOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::DivOp, IREE::VMLA::DivOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::DivOp, IREE::VMLA::DivOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::MulOp, IREE::VMLA::MulOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::MulOp, IREE::VMLA::MulOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::PowOp, IREE::VMLA::PowOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::PowOp, IREE::VMLA::PowOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::RemOp, IREE::VMLA::RemOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::RemOp, IREE::VMLA::RemOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::ShiftLeftOp, IREE::VMLA::ShlOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::ShiftLeftOp, IREE::VMLA::ShlOp>>(
       context, typeConverter);
   patterns.insert<
-      VMLAOpConversion<xla_hlo::ShiftRightArithmeticOp, IREE::VMLA::ShrOp>>(
+      VMLAOpConversion<mhlo::ShiftRightArithmeticOp, IREE::VMLA::ShrOp>>(
       context, typeConverter);
-  patterns
-      .insert<VMLAOpConversion<xla_hlo::ShiftRightLogicalOp, IREE::VMLA::ShrOp,
-                               VMLAOpSemantics::kForceUnsigned>>(context,
-                                                                 typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::AndOp, IREE::VMLA::AndOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::ShiftRightLogicalOp, IREE::VMLA::ShrOp,
+                                   VMLAOpSemantics::kForceUnsigned>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::OrOp, IREE::VMLA::OrOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::AndOp, IREE::VMLA::AndOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::XorOp, IREE::VMLA::XorOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::OrOp, IREE::VMLA::OrOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::ExpOp, IREE::VMLA::ExpOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::XorOp, IREE::VMLA::XorOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::LogOp, IREE::VMLA::LogOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::ExpOp, IREE::VMLA::ExpOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::FloorOp, IREE::VMLA::FloorOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::LogOp, IREE::VMLA::LogOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::RsqrtOp, IREE::VMLA::RsqrtOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::FloorOp, IREE::VMLA::FloorOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::SqrtOp, IREE::VMLA::SqrtOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::RsqrtOp, IREE::VMLA::RsqrtOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::CosOp, IREE::VMLA::CosOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::SqrtOp, IREE::VMLA::SqrtOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::SinOp, IREE::VMLA::SinOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::CosOp, IREE::VMLA::CosOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::TanhOp, IREE::VMLA::TanhOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::SinOp, IREE::VMLA::SinOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::Atan2Op, IREE::VMLA::Atan2Op>>(
+  patterns.insert<VMLAOpConversion<mhlo::TanhOp, IREE::VMLA::TanhOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::SelectOp, IREE::VMLA::SelectOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::Atan2Op, IREE::VMLA::Atan2Op>>(
+      context, typeConverter);
+  patterns.insert<VMLAOpConversion<mhlo::SelectOp, IREE::VMLA::SelectOp>>(
       context, typeConverter);
   patterns.insert<ConvertOpConversion>(context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::ReverseOp, IREE::VMLA::ReverseOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::ReverseOp, IREE::VMLA::ReverseOp>>(
+      context, typeConverter);
+  patterns.insert<VMLAOpConversion<mhlo::TransposeOp, IREE::VMLA::TransposeOp>>(
+      context, typeConverter);
+  patterns.insert<VMLAOpConversion<mhlo::PadOp, IREE::VMLA::PadOp>>(
       context, typeConverter);
   patterns
-      .insert<VMLAOpConversion<xla_hlo::TransposeOp, IREE::VMLA::TransposeOp>>(
+      .insert<VMLAOpConversion<mhlo::TorchIndexSelectOp, IREE::VMLA::GatherOp>>(
           context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::PadOp, IREE::VMLA::PadOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::AbsOp, IREE::VMLA::AbsOp>>(
       context, typeConverter);
-  patterns.insert<
-      VMLAOpConversion<xla_hlo::TorchIndexSelectOp, IREE::VMLA::GatherOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::NegOp, IREE::VMLA::NegOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::AbsOp, IREE::VMLA::AbsOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::MaxOp, IREE::VMLA::MaxOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::NegOp, IREE::VMLA::NegOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::MinOp, IREE::VMLA::MinOp>>(
       context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::MaxOp, IREE::VMLA::MaxOp>>(
-      context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::MinOp, IREE::VMLA::MinOp>>(
-      context, typeConverter);
-  patterns.insert<VMLAOpConversion<xla_hlo::ClampOp, IREE::VMLA::ClampOp>>(
+  patterns.insert<VMLAOpConversion<mhlo::ClampOp, IREE::VMLA::ClampOp>>(
       context, typeConverter);
 
   patterns.insert<CompareOpConversion>(context, typeConverter);
 
   // Ops that are only used for type information that we erase. We can elide
   // these entirely by just passing on their input values.
-  patterns.insert<IdentityOpConversion<xla_hlo::BitcastConvertOp>>(context);
-  patterns.insert<IdentityOpConversion<xla_hlo::CopyOp>>(context);
-  patterns.insert<IdentityOpConversion<xla_hlo::ReshapeOp>>(context);
-  patterns.insert<IdentityOpConversion<xla_hlo::DynamicReshapeOp>>(context);
+  patterns.insert<IdentityOpConversion<mhlo::BitcastConvertOp>>(context);
+  patterns.insert<IdentityOpConversion<mhlo::CopyOp>>(context);
+  patterns.insert<IdentityOpConversion<mhlo::ReshapeOp>>(context);
+  patterns.insert<IdentityOpConversion<mhlo::DynamicReshapeOp>>(context);
 
   // Conversions that don't have a 1:1 mapping, mostly involving buffer views
   // or transfers.

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertReductionOps.cpp b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertReductionOps.cpp
index f2d0161..1a2ddd3 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertReductionOps.cpp
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/ConvertReductionOps.cpp

@@ -36,21 +36,21 @@
 
 namespace {
 
-// Converts a simple xla_hlo.reduce op that performs independent individual
-// computations into a set of xla_hlo.reduce ops. This is an intermediate
+// Converts a simple mhlo.reduce op that performs independent individual
+// computations into a set of mhlo.reduce ops. This is an intermediate
 // conversion that may make it possible to use the much faster builtin VMLA
 // reduction ops.
 //
 // Only supports single dimensional reductions and assumes that unrolling has
 // been performed prior to conversion.
 struct SplitIndependentReductionOpConversion
-    : public OpConversionPattern<xla_hlo::ReduceOp> {
+    : public OpConversionPattern<mhlo::ReduceOp> {
   SplitIndependentReductionOpConversion(MLIRContext *context,
                                         TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ReduceOp srcOp, ArrayRef<Value> operands,
+      mhlo::ReduceOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     if (srcOp.dimensions().getNumElements() > 1) {
       srcOp.emitOpError() << "multi-dimensional reductions must be unrolled";
@@ -60,7 +60,7 @@
       return failure();
     }
     auto &block = srcOp.body().getBlocks().front();
-    xla_hlo::ReduceOp::Adaptor newOperands(operands);
+    mhlo::ReduceOp::Adaptor newOperands(operands);
     SmallVector<Value, 4> setResults;
     for (auto &op : block) {
       if (op.isKnownTerminator()) {
@@ -105,7 +105,7 @@
       // Create the new op for this set.
       Value operandArg = srcOp.operands()[opSetIndex];
       Value initArg = srcOp.init_values()[opSetIndex];
-      auto splitOp = rewriter.create<xla_hlo::ReduceOp>(
+      auto splitOp = rewriter.create<mhlo::ReduceOp>(
           op.getLoc(), ValueRange{operandArg}, ValueRange{initArg},
           srcOp.dimensionsAttr());
       auto *splitBlock = new Block();
@@ -116,7 +116,7 @@
         mapping.map(operand, splitBlock->addArgument(operand.getType()));
       }
       Operation *splitComputeOp = splitBuilder.clone(op, mapping);
-      splitBuilder.create<xla_hlo::ReturnOp>(
+      splitBuilder.create<mhlo::ReturnOp>(
           srcOp.getLoc(), ValueRange{*splitComputeOp->getResults().begin()});
       setResults.push_back(*splitOp.getResults().begin());
     }
@@ -128,7 +128,7 @@
   TypeConverter &typeConverter;
 };
 
-// Converts an xla_hlo.reduce with a single op to a builtin reduce op.
+// Converts an mhlo.reduce with a single op to a builtin reduce op.
 // This is meant to pair with the SplitIndependentReductionOpConversion that
 // tries to unfuse/divide combined reductions. If this cannot match then the
 // fallback path will be used and a VM loop will be emitted (slower, but can
@@ -136,14 +136,13 @@
 //
 // Only supports single dimensional reductions and assumes that unrolling has
 // been performed prior to conversion.
-struct BuiltinReduceOpConversion
-    : public OpConversionPattern<xla_hlo::ReduceOp> {
+struct BuiltinReduceOpConversion : public OpConversionPattern<mhlo::ReduceOp> {
   BuiltinReduceOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context, /*benefit=*/1000),
         typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ReduceOp srcOp, ArrayRef<Value> operands,
+      mhlo::ReduceOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     if (srcOp.dimensions().getNumElements() > 1) {
       srcOp.emitOpError() << "multi-dimensional reductions must be unrolled";
@@ -172,17 +171,17 @@
 
     auto &computeOp = *srcOp.body().front().begin();
     if (isa<mlir::AddIOp>(computeOp) || isa<mlir::AddFOp>(computeOp) ||
-        isa<xla_hlo::AddOp>(computeOp)) {
+        isa<mhlo::AddOp>(computeOp)) {
       rewriter.create<IREE::VMLA::ReduceSumOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape,
           rewriter.getI32IntegerAttr(dimension), dst, dstShape,
           TypeAttr::get(elementType));
-    } else if (isa<xla_hlo::MinOp>(computeOp)) {
+    } else if (isa<mhlo::MinOp>(computeOp)) {
       rewriter.create<IREE::VMLA::ReduceMinOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape,
           rewriter.getI32IntegerAttr(dimension), dst, dstShape,
           TypeAttr::get(elementType));
-    } else if (isa<xla_hlo::MaxOp>(computeOp)) {
+    } else if (isa<mhlo::MaxOp>(computeOp)) {
       rewriter.create<IREE::VMLA::ReduceMaxOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape,
           rewriter.getI32IntegerAttr(dimension), dst, dstShape,
@@ -199,17 +198,16 @@
   TypeConverter &typeConverter;
 };
 
-// Converts a generic xla_hlo.reduce to a VM loop.
+// Converts a generic mhlo.reduce to a VM loop.
 //
 // Only supports single dimensional reductions and assumes that unrolling has
 // been performed prior to conversion.
-struct GenericReduceOpConversion
-    : public OpConversionPattern<xla_hlo::ReduceOp> {
+struct GenericReduceOpConversion : public OpConversionPattern<mhlo::ReduceOp> {
   GenericReduceOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context), typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ReduceOp srcOp, ArrayRef<Value> operands,
+      mhlo::ReduceOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     if (srcOp.dimensions().getNumElements() > 1) {
       srcOp.emitOpError() << "multi-dimensional reductions must be unrolled";
@@ -225,13 +223,13 @@
 };
 
 struct BuiltinPoolingOpConversion
-    : public OpConversionPattern<xla_hlo::ReduceWindowOp> {
+    : public OpConversionPattern<mhlo::ReduceWindowOp> {
   BuiltinPoolingOpConversion(MLIRContext *context, TypeConverter &typeConverter)
       : OpConversionPattern(context, /*benefit=*/1000),
         typeConverter(typeConverter) {}
 
   LogicalResult matchAndRewrite(
-      xla_hlo::ReduceWindowOp srcOp, ArrayRef<Value> operands,
+      mhlo::ReduceWindowOp srcOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     if (srcOp.body().getBlocks().size() > 1) {
       // Control flow within the computation is not supported; bail to fallback.
@@ -269,21 +267,21 @@
 
     auto &computeOp = *srcOp.body().front().begin();
     if (isa<mlir::AddIOp>(computeOp) || isa<mlir::AddFOp>(computeOp) ||
-        isa<xla_hlo::AddOp>(computeOp)) {
+        isa<mhlo::AddOp>(computeOp)) {
       rewriter.create<IREE::VMLA::PoolingSumOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape, dst,
           dstShape, TypeAttr::get(elementType),
           rewriter.getI32VectorAttr(windowDimensions),
           rewriter.getI32VectorAttr(windowStrides),
           rewriter.getI32VectorAttr(padding));
-    } else if (isa<xla_hlo::MinOp>(computeOp)) {
+    } else if (isa<mhlo::MinOp>(computeOp)) {
       rewriter.create<IREE::VMLA::PoolingMinOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape, dst,
           dstShape, TypeAttr::get(elementType),
           rewriter.getI32VectorAttr(windowDimensions),
           rewriter.getI32VectorAttr(windowStrides),
           rewriter.getI32VectorAttr(padding));
-    } else if (isa<xla_hlo::MaxOp>(computeOp)) {
+    } else if (isa<mhlo::MaxOp>(computeOp)) {
       rewriter.create<IREE::VMLA::PoolingMaxOp>(
           srcOp.getLoc(), operand, operandShape, initValue, initValueShape, dst,
           dstShape, TypeAttr::get(elementType),

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/concatenate.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/concatenate.mlir
index 85e7234..abbfd71 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/concatenate.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/concatenate.mlir

@@ -16,7 +16,7 @@
   // CHECK-SAME: src_indices = [%c0, %c0],
   // CHECK-SAME: out %[[DST]](%rs2_5 : !shapex.ranked_shape<[2,5]>),
   // CHECK-SAME: dst_indices = [%c0, %c2], lengths = [%c2, %c3] : i32
-  %0 = "xla_hlo.concatenate"(%arg0, %c0) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>) -> tensor<2x5xi32>
+  %0 = "mhlo.concatenate"(%arg0, %c0) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>) -> tensor<2x5xi32>
   // CHECK-NEXT: return %[[DST]]
   return %0: tensor<2x5xi32>
 }
@@ -39,7 +39,7 @@
   // CHECK-SAME: src_indices = [%c0, %c0],
   // CHECK-SAME: out %[[DST]](%rs2_5 : !shapex.ranked_shape<[2,5]>),
   // CHECK-SAME: dst_indices = [%c0, %c3], lengths = [%c2, %c2] : i32
-  %0 = "xla_hlo.concatenate"(%arg0, %c0) {dimension = 1} : (tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x5xi32>
+  %0 = "mhlo.concatenate"(%arg0, %c0) {dimension = 1} : (tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x5xi32>
   // CHECK-NEXT: return %[[DST]]
   return %0: tensor<2x5xi32>
 }
@@ -69,7 +69,7 @@
   // CHECK-SAME: src_indices = [%c0, %c0],
   // CHECK-SAME: out %[[DST]](%rs2_7 : !shapex.ranked_shape<[2,7]>),
   // CHECK-SAME: dst_indices = [%c0, %c5], lengths = [%c2, %c2] : i32
-  %0 = "xla_hlo.concatenate"(%arg0, %c0, %c1) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x7xi32>
+  %0 = "mhlo.concatenate"(%arg0, %c0, %c1) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x7xi32>
   // CHECK-NEXT: return %[[DST]]
   return %0: tensor<2x7xi32>
 }
@@ -92,7 +92,7 @@
   // CHECK-SAME: src_indices = [%c0, %c0],
   // CHECK-SAME: out %[[DST]](%rs4_2 : !shapex.ranked_shape<[4,2]>),
   // CHECK-SAME: dst_indices = [%c2, %c0], lengths = [%c2, %c2] : i32
-  %0 = "xla_hlo.concatenate"(%arg0, %c0) {dimension = 0} : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<4x2xi32>
+  %0 = "mhlo.concatenate"(%arg0, %c0) {dimension = 0} : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<4x2xi32>
   // CHECK-NEXT: return %[[DST]]
   return %0: tensor<4x2xi32>
 }

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/conv.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/conv.mlir
index 55c81a1..0d1635d 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/conv.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/conv.mlir

@@ -9,7 +9,7 @@
   // CHECK-SAME: padding = dense<[1, 2, 2, 2]> : vector<4xi32>,
   // CHECK-SAME: rhs_dilation = dense<1> : vector<2xi32>,
   // CHECK-SAME: window_strides = dense<1> : vector<2xi32>}
-  %2 = "xla_hlo.convolution"(%arg0, %arg1) {
+  %2 = "mhlo.convolution"(%arg0, %arg1) {
         batch_group_count = 1 : i64,
         dimension_numbers = {
           input_batch_dimension = 0 : i64,

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/convert.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/convert.mlir
index 3bc862d..95759c6 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/convert.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/convert.mlir

@@ -3,13 +3,13 @@
 // CHECK-LABEL: func @basic
 func @basic(%arg0 : tensor<5xf32>) -> (tensor<5xi32>) attributes { sym_visibility = "private" } {
   // CHECK: vmla.convert
-  %0 = "xla_hlo.convert"(%arg0) : (tensor<5xf32>) -> tensor<5xi32>
+  %0 = "mhlo.convert"(%arg0) : (tensor<5xf32>) -> tensor<5xi32>
   return %0 : tensor<5xi32>
 }
 
 // CHECK-LABEL: func @noop
 func @noop(%arg0 : tensor<?xf32>) -> (tensor<5xf32>) attributes { sym_visibility = "private" } {
   // CHECK: return %arg0
-  %0 = "xla_hlo.convert"(%arg0) : (tensor<?xf32>) -> tensor<5xf32>
+  %0 = "mhlo.convert"(%arg0) : (tensor<?xf32>) -> tensor<5xf32>
   return %0 : tensor<5xf32>
 }

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/dynamic_slice.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/dynamic_slice.mlir
index 8f35b91..6af4268 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/dynamic_slice.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/dynamic_slice.mlir

@@ -19,7 +19,7 @@
   // CHECK-SAME: src_indices = [%[[SRC_INDEX_0]], %[[SRC_INDEX_1]]],
   // CHECK-SAME: out %[[DST]](%rs3_4 : !shapex.ranked_shape<[3,4]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c3, %c4] : i32
-  %result = "xla_hlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
+  %result = "mhlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
     slice_sizes = dense<[3, 4]> : tensor<2xi64>
   } : (tensor<3x4xi32>, tensor<i64>, tensor<i64>) -> tensor<3x4xi32>
   // CHECK-NEXT: return %[[DST]]
@@ -47,7 +47,7 @@
   // CHECK-SAME: src_indices = [%[[SRC_INDEX_0]], %[[SRC_INDEX_1]]],
   // CHECK-SAME: out %[[DST]](%rs1_4 : !shapex.ranked_shape<[1,4]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c1, %c4] : i32
-  %result = "xla_hlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
+  %result = "mhlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
     slice_sizes = dense<[1, 4]> : tensor<2xi64>
   } : (tensor<3x4xi32>, tensor<i64>, tensor<i64>) -> tensor<1x4xi32>
   // CHECK-NEXT: return %[[DST]]
@@ -75,7 +75,7 @@
   // CHECK-SAME: src_indices = [%[[SRC_INDEX_0]], %[[SRC_INDEX_1]]],
   // CHECK-SAME: out %[[DST]](%rs1_2 : !shapex.ranked_shape<[1,2]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c1, %c2] : i32
-  %result = "xla_hlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
+  %result = "mhlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
     slice_sizes = dense<[1, 2]> : tensor<2xi64>
   } : (tensor<3x4xi32>, tensor<i64>, tensor<i64>) -> tensor<1x2xi32>
   // CHECK-NEXT: return %[[DST]]
@@ -103,7 +103,7 @@
   // CHECK-SAME: src_indices = [%[[SRC_INDEX_0]], %[[SRC_INDEX_1]]],
   // CHECK-SAME: out %[[DST]](%rs2_4 : !shapex.ranked_shape<[2,4]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c2, %c4] : i32
-  %result = "xla_hlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
+  %result = "mhlo.dynamic-slice"(%input, %src_idx_1, %src_idx_2) {
     slice_sizes = dense<[2, 4]> : tensor<2xi64>
   } : (tensor<3x4xi32>, tensor<i64>, tensor<i64>) -> tensor<2x4xi32>
   // CHECK-NEXT: return %[[DST]]

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/gather.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/gather.mlir
index eeb89de..579c220 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/gather.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/gather.mlir

@@ -14,7 +14,7 @@
   // CHECK-SAME: src_indices = [%[[INDEX0]], %c0, %c0],
   // CHECK-SAME: out %[[DST]](%[[DST_SHAPE]] : !shapex.ranked_shape<[1,1,5]>),
   // CHECK-SAME: dst_indices = [%c0, %c0, %c0], lengths = [%c1, %c1, %c5] : i32
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,
@@ -47,7 +47,7 @@
   // CHECK-SAME: src_indices = [%[[INDEX0]], %[[INDEX1]], %[[INDEX2]]],
   // CHECK-SAME: out %[[DST]](%[[DST_SHAPE]] : !shapex.ranked_shape<[1,2,3]>),
   // CHECK-SAME: dst_indices = [%c0, %c0, %c0], lengths = [%c1, %c2, %c3] : f32
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,
@@ -65,8 +65,8 @@
 // expected-error@-3 {{conversion to the VMLA dialect failed}}
 func @gather_not_lowered_axis_1(%input : tensor<5x2x3xf32>, %start_indices : tensor<2x2xi64>) attributes { sym_visibility = "private" } {
   // expected-remark@+2 {{couldn't lower gather}}
-  // expected-error@+1 {{failed to legalize operation 'xla_hlo.gather' that was explicitly marked illegal}}
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  // expected-error@+1 {{failed to legalize operation 'mhlo.gather' that was explicitly marked illegal}}
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 1 : i64,
@@ -83,8 +83,8 @@
 // expected-error@-3 {{conversion to the VMLA dialect failed}}
 func @gather_not_lowered_collapse(%input : tensor<5x2x3xf32>, %start_indices : tensor<2x2xi64>) attributes { sym_visibility = "private" } {
   // expected-remark@+2 {{couldn't lower gather}}
-  // expected-error@+1 {{failed to legalize operation 'xla_hlo.gather' that was explicitly marked illegal}}
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  // expected-error@+1 {{failed to legalize operation 'mhlo.gather' that was explicitly marked illegal}}
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<1> : tensor<1xi64>,
       index_vector_dim = 0 : i64,
@@ -101,8 +101,8 @@
 // expected-error@-3 {{conversion to the VMLA dialect failed}}
 func @gather_not_lowered_transposes(%input : tensor<5x2x3xf32>, %start_indices : tensor<2x2xi64>) attributes { sym_visibility = "private" } {
   // expected-remark@+2 {{couldn't lower gather}}
-  // expected-error@+1 {{failed to legalize operation 'xla_hlo.gather' that was explicitly marked illegal}}
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  // expected-error@+1 {{failed to legalize operation 'mhlo.gather' that was explicitly marked illegal}}
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,
@@ -119,8 +119,8 @@
 // expected-error@-3 {{conversion to the VMLA dialect failed}}
 func @gather_not_lowered_batch_dims(%input : tensor<5x2x3xf32>, %start_indices : tensor<2x2xi64>) attributes { sym_visibility = "private" } {
   // expected-remark@+2 {{couldn't lower gather}}
-  // expected-error@+1 {{failed to legalize operation 'xla_hlo.gather' that was explicitly marked illegal}}
-  %0 = "xla_hlo.gather"(%input, %start_indices) {
+  // expected-error@+1 {{failed to legalize operation 'mhlo.gather' that was explicitly marked illegal}}
+  %0 = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/math_ops.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/math_ops.mlir
index 70e3831..a30898d 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/math_ops.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/math_ops.mlir

@@ -5,7 +5,7 @@
   // CHECK-NEXT: %[[BUF_SZ:.+]] = constant 4
   // CHECK-NEXT: %[[BUF:.+]] = vmla.buffer.alloc byte_length = %[[BUF_SZ]] : !vmla.buffer
   // CHECK-NEXT: vmla.abs %arg0, out %[[BUF]] : f32
-  %0 = "xla_hlo.abs"(%arg0) : (tensor<f32>) -> tensor<f32>
+  %0 = "mhlo.abs"(%arg0) : (tensor<f32>) -> tensor<f32>
   // CHECK-NEXT: return %[[BUF]]
   return %0 : tensor<f32>
 }
@@ -17,7 +17,7 @@
   // CHECK-NEXT: %[[BUF_SZ:.+]] = constant 16
   // CHECK-NEXT: %[[BUF:.+]] = vmla.buffer.alloc byte_length = %[[BUF_SZ]] : !vmla.buffer
   // CHECK-NEXT: vmla.abs %arg0, out %[[BUF]] : f32
-  %0 = "xla_hlo.abs"(%arg0) : (tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.abs"(%arg0) : (tensor<4xf32>) -> tensor<4xf32>
   // CHECK-NEXT: return %[[BUF]]
   return %0 : tensor<4xf32>
 }
@@ -29,7 +29,7 @@
   // CHECK-NEXT: %[[BUF_SZ:.+]] = constant 16
   // CHECK-NEXT: %[[BUF:.+]] = vmla.buffer.alloc byte_length = %[[BUF_SZ]] : !vmla.buffer
   // CHECK-NEXT: vmla.clamp %arg0, %arg1, %arg2, out %[[BUF]] : f32
-  %0 = "xla_hlo.clamp"(%arg0, %arg1, %arg2) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.clamp"(%arg0, %arg1, %arg2) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   // CHECK-NEXT: return %[[BUF]]
   return %0 : tensor<4xf32>
 }

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce.mlir
index 9f9971c..2590f4c 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce.mlir

@@ -13,10 +13,10 @@
   // CHECK-SAME: %[[INIT]](%[[INIT_SHAPE]] : !shapex.ranked_shape<[]>),
   // CHECK-SAME: out %[[DST]](%[[DST_SHAPE]] : !shapex.ranked_shape<[4]>)
   // CHECK-SaME: {dimension = 1 : i32} : f32
-  %0 = "xla_hlo.reduce"(%arg0, %cst) ( {
+  %0 = "mhlo.reduce"(%arg0, %cst) ( {
   ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-    %1 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+    %1 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<f32>) -> tensor<4xf32>
   // CHECK-NEXT: return %[[DST]] : !vmla.buffer
   return %0 : tensor<4xf32>
@@ -46,11 +46,11 @@
   // CHECK-SAME: %[[CST1]](%[[SCALAR_SHAPE]] : !shapex.ranked_shape<[]>),
   // CHECK-SAME: out %[[RET1]](%[[RESULT_SHAPE]] : !shapex.ranked_shape<[4]>)
   // CHECK-SaME: {dimension = 1 : i32} : f32
-  %2, %3 = "xla_hlo.reduce"(%arg0, %arg1, %0, %1) ( {
+  %2, %3 = "mhlo.reduce"(%arg0, %arg1, %0, %1) ( {
   ^bb0(%arg0_lhs : tensor<f32>, %arg1_lhs : tensor<f32>, %arg0_rhs : tensor<f32>, %arg1_rhs : tensor<f32>):
-    %4 = xla_hlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
-    %5 = xla_hlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
-    "xla_hlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
+    %4 = mhlo.add %arg0_lhs, %arg0_rhs : tensor<f32>
+    %5 = mhlo.add %arg1_lhs, %arg1_rhs : tensor<f32>
+    "mhlo.return"(%4, %5) : (tensor<f32>, tensor<f32>) -> ()
   }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor<4x8xf32>, tensor<4x8xf32>, tensor<f32>, tensor<f32>) -> (tensor<4xf32>, tensor<4xf32>)
   // CHECK-NEXT: return %[[RET0]], %[[RET1]] : !vmla.buffer, !vmla.buffer
   return %2, %3 : tensor<4xf32>, tensor<4xf32>

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce_window.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce_window.mlir
index 2e83290..692bcfc 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce_window.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reduce_window.mlir

@@ -5,10 +5,10 @@
     attributes { sym_visibility = "private" } {
   // CHECK: vmla.pooling.max
   %cst = constant dense<0.000000e+00> : tensor<f32>
-  %0 = "xla_hlo.reduce_window"(%arg0, %cst) ( {
+  %0 = "mhlo.reduce_window"(%arg0, %cst) ( {
   ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-    %1 = xla_hlo.maximum %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+    %1 = mhlo.maximum %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
   }) {window_dimensions = dense<[1, 2, 3, 1]> : tensor<4xi64>,
       window_strides = dense<1> : tensor<4xi64>
   } : (tensor<1x4x6x1xf32>, tensor<f32>) -> tensor<1x2x2x1xf32>
@@ -22,10 +22,10 @@
     attributes { sym_visibility = "private" } {
   // CHECK: vmla.pooling.min
   %cst = constant dense<0> : tensor<i32>
-  %0 = "xla_hlo.reduce_window"(%arg0, %cst) ( {
+  %0 = "mhlo.reduce_window"(%arg0, %cst) ( {
   ^bb0(%arg1: tensor<i32>, %arg2: tensor<i32>):  // no predecessors
-    %1 = xla_hlo.minimum %arg1, %arg2 : tensor<i32>
-    "xla_hlo.return"(%1) : (tensor<i32>) -> ()
+    %1 = mhlo.minimum %arg1, %arg2 : tensor<i32>
+    "mhlo.return"(%1) : (tensor<i32>) -> ()
   }) {window_dimensions = dense<[1, 2, 3, 1]> : tensor<4xi64>,
       window_strides = dense<1> : tensor<4xi64>
   } : (tensor<1x4x6x1xi32>, tensor<i32>) -> tensor<1x2x2x1xi32>
@@ -39,10 +39,10 @@
     { sym_visibility = "private" } {
   // CHECK: vmla.pooling.sum
   %cst = constant dense<0.000000e+00> : tensor<f32>
-  %0 = "xla_hlo.reduce_window"(%arg0, %cst) ( {
+  %0 = "mhlo.reduce_window"(%arg0, %cst) ( {
   ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-    %1 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+    %1 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
   }) {window_dimensions = dense<[2, 3]> : tensor<2xi64>,
       window_strides = dense<1> : tensor<2xi64>,
       padding = dense<[[1, 0], [2, 0]]> : tensor<2x2xi64>

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reshape.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reshape.mlir
index 9ffba78..939c1d8 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reshape.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/reshape.mlir

@@ -3,7 +3,7 @@
 // CHECK-LABEL: @reshape_bypass
 func @reshape_bypass(%arg0 : tensor<3x2xi32>) -> tensor<6xi32> attributes { sym_visibility = "private" } {
   // CHECK-NEXT: return %arg0
-  %0 = "xla_hlo.reshape"(%arg0) : (tensor<3x2xi32>) -> tensor<6xi32>
+  %0 = "mhlo.reshape"(%arg0) : (tensor<3x2xi32>) -> tensor<6xi32>
   return %0 : tensor<6xi32>
 }
 
@@ -12,7 +12,7 @@
 // CHECK-LABEL: @reshape_copy
 func @reshape_copy(%arg0 : tensor<3x2xi32>) -> (tensor<3x2xi32>, tensor<6xi32>) attributes { sym_visibility = "private" } {
   // CHECK-NEXT: %0 = vmla.buffer.clone %arg0 : !vmla.buffer
-  %0 = "xla_hlo.reshape"(%arg0) : (tensor<3x2xi32>) -> tensor<6xi32>
+  %0 = "mhlo.reshape"(%arg0) : (tensor<3x2xi32>) -> tensor<6xi32>
   // CHECK-NEXT: return %arg0, %0
   return %arg0, %0 : tensor<3x2xi32>, tensor<6xi32>
 }

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/slice.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/slice.mlir
index eebb353..52e6fe4 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/slice.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/slice.mlir

@@ -9,7 +9,7 @@
   // CHECK-SAME: src_indices = [%c1, %c0],
   // CHECK-SAME: out %[[DST]](%rs1_4 : !shapex.ranked_shape<[1,4]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c1, %c4] : i32
-  %result = "xla_hlo.slice"(%arg0) {
+  %result = "mhlo.slice"(%arg0) {
     start_indices = dense<[1, 0]> : tensor<2xi64>,
     limit_indices = dense<[2, 4]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>
@@ -29,7 +29,7 @@
   // CHECK-SAME: src_indices = [%c1, %c1],
   // CHECK-SAME: out %[[DST]](%rs1_2 : !shapex.ranked_shape<[1,2]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c1, %c2] : i32
-  %result = "xla_hlo.slice"(%arg0) {
+  %result = "mhlo.slice"(%arg0) {
     start_indices = dense<[1, 1]> : tensor<2xi64>,
     limit_indices = dense<[2, 3]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>
@@ -49,7 +49,7 @@
   // CHECK-SAME: src_indices = [%c1, %c0],
   // CHECK-SAME: out %[[DST]](%rs2_4 : !shapex.ranked_shape<[2,4]>),
   // CHECK-SAME: dst_indices = [%c0, %c0], lengths = [%c2, %c4] : i32
-  %result = "xla_hlo.slice"(%arg0) {
+  %result = "mhlo.slice"(%arg0) {
     start_indices = dense<[1, 0]> : tensor<2xi64>,
     limit_indices = dense<[3, 4]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>

diff --git a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/transpose.mlir b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/transpose.mlir
index 4dcef6b..b15deb7 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/transpose.mlir
+++ b/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/test/transpose.mlir

@@ -12,7 +12,7 @@
   // CHECK-SAME: %[[SRC]](%[[SRC_SHAPE]] : !shapex.ranked_shape<[7,24,10]>),
   // CHECK-SAME: out %[[DST]](%[[DST_SHAPE]] : !shapex.ranked_shape<[24,7,10]>)
   // CHECK-SAME: {permutation = dense<[1, 0, 2]> : tensor<3xi32>} : f32
-  %0 = "xla_hlo.transpose"(%input) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<7x24x10xf32>) -> tensor<24x7x10xf32>
+  %0 = "mhlo.transpose"(%input) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<7x24x10xf32>) -> tensor<24x7x10xf32>
   // CHECK-NEXT: return %[[DST]]
   return %0 : tensor<24x7x10xf32>
 }

diff --git a/iree/compiler/Dialect/VMLA/README.md b/iree/compiler/Dialect/VMLA/README.md
index 5899e76..c5832ad 100644
--- a/iree/compiler/Dialect/VMLA/README.md
+++ b/iree/compiler/Dialect/VMLA/README.md

@@ -102,9 +102,9 @@
 See [HLOToVMLA](/iree/compiler/Dialect/VMLA/Conversion/HLOToVMLA/) for examples
 of the former. Most ops can use the `VMLAOpConversion` helper to automatically
 convert between ops so long as they match in values and attributes (for example,
-`xla_hlo.add` can be trivially converted to `vmla.add`). Examples of more
-complex ops that may require additional IR to be emitted or attributes to be
-mapped can be seen in there as well.
+`mhlo.add` can be trivially converted to `vmla.add`). Examples of more complex
+ops that may require additional IR to be emitted or attributes to be mapped can
+be seen in there as well.
 
 You can add tests for your conversion as needed under `test/` in the appropriate
 dialect-specific conversion folder.

diff --git a/iree/compiler/Dialect/VMLA/Transforms/Conversion.cpp b/iree/compiler/Dialect/VMLA/Transforms/Conversion.cpp
index 656ace3..84ce1d0 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/Conversion.cpp
+++ b/iree/compiler/Dialect/VMLA/Transforms/Conversion.cpp

@@ -78,7 +78,7 @@
     VMLAConversionTarget conversionTarget(context, typeConverter);
 
     // Ensure all input dialects go away.
-    conversionTarget.addIllegalDialect<xla_hlo::XlaHloDialect>();
+    conversionTarget.addIllegalDialect<mhlo::XlaHloDialect>();
     conversionTarget.addIllegalDialect<IREE::HAL::HALDialect>();
 
     OwningRewritePatternList conversionPatterns;
@@ -114,7 +114,7 @@
     // TODO(silvasean): Legalize ToExtentTensorOp and FromExtentTensorOp.
     conversionTarget.addIllegalOp<Shape::FromExtentTensorOp>();
     // RankedBroadcastInDimOp is an logically something that should be an
-    // xla_hlo op (or in a dialect at a similar level of abstraction), but since
+    // mhlo op (or in a dialect at a similar level of abstraction), but since
     // it isn't technically in that dialect, we need to special-case mark it as
     // illegal here.
     // TODO(silvasean): Reconcile the dialect layering here.

diff --git a/iree/compiler/Dialect/VMLA/Transforms/Passes.cpp b/iree/compiler/Dialect/VMLA/Transforms/Passes.cpp
index e07d530..d29b463 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/VMLA/Transforms/Passes.cpp

@@ -33,7 +33,7 @@
   // ---------------------------------------------------------------------------
   // Inline and flatten structured control flow to our CFG.
   // ---------------------------------------------------------------------------
-  passManager.addNestedPass<FuncOp>(xla_hlo::createLegalizeControlFlowPass());
+  passManager.addNestedPass<FuncOp>(mhlo::createLegalizeControlFlowPass());
 
   // Perform inlining and cleanup after CFG manipulation.
   passManager.addPass(createInlinerPass());

diff --git a/iree/compiler/Dialect/VMLA/Transforms/PreConversionLowering.cpp b/iree/compiler/Dialect/VMLA/Transforms/PreConversionLowering.cpp
index 6d6333e..a75f2e6 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/PreConversionLowering.cpp
+++ b/iree/compiler/Dialect/VMLA/Transforms/PreConversionLowering.cpp

@@ -42,13 +42,13 @@
 
 namespace {
 
-// Convert instances of `xla_hlo.dot` to `xla_hlo.dot_general`.
+// Convert instances of `mhlo.dot` to `mhlo.dot_general`.
 //
 // TODO(silvasean): This logically is part of a future HLO client -> HLO server
-// type of pass in the xla_hlo dialect proper.
-struct LowerDotOp : public OpRewritePattern<xla_hlo::DotOp> {
+// type of pass in the mhlo dialect proper.
+struct LowerDotOp : public OpRewritePattern<mhlo::DotOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::DotOp op,
+  LogicalResult matchAndRewrite(mhlo::DotOp op,
                                 PatternRewriter &rewriter) const override {
     Value lhs = op.lhs();
     Value rhs = op.rhs();
@@ -66,13 +66,13 @@
                                         rewriter.getIntegerType(64));
       return DenseIntElementsAttr::get(type, integers);
     };
-    auto dimensionNumbers = xla_hlo::DotDimensionNumbers::get(
+    auto dimensionNumbers = mhlo::DotDimensionNumbers::get(
         /*lhs_batching_dimensions=*/make1DElementsAttr({}),
         /*rhs_batching_dimensions=*/make1DElementsAttr({}),
         /*lhs_contracting_dimensions=*/make1DElementsAttr({1}),
         /*rhs_contracting_dimensions=*/make1DElementsAttr({0}),
         rewriter.getContext());
-    rewriter.replaceOpWithNewOp<xla_hlo::DotGeneralOp>(
+    rewriter.replaceOpWithNewOp<mhlo::DotGeneralOp>(
         op, op.getType(), lhs, rhs, dimensionNumbers,
         op.precision_config().hasValue() ? op.precision_config().getValue()
                                          : nullptr);
@@ -95,9 +95,9 @@
 // VMLA::BatchMatMulPseudoOp which represents this transformation.
 //
 // TODO(silvasean): Move this to a "prepare" pass and test separately.
-struct LowerDotGeneralOp : public OpRewritePattern<xla_hlo::DotGeneralOp> {
+struct LowerDotGeneralOp : public OpRewritePattern<mhlo::DotGeneralOp> {
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::DotGeneralOp op,
+  LogicalResult matchAndRewrite(mhlo::DotGeneralOp op,
                                 PatternRewriter &rewriter) const override {
     Value lhs = op.lhs();
     Value rhs = op.rhs();
@@ -107,7 +107,7 @@
     if (!lhsType || !rhsType) {
       return rewriter.notifyMatchFailure(op, "requires ranked types");
     }
-    xla_hlo::DotDimensionNumbers dimNumbers = op.dot_dimension_numbers();
+    mhlo::DotDimensionNumbers dimNumbers = op.dot_dimension_numbers();
     auto extract1DVector = [](DenseIntElementsAttr elements) {
       SmallVector<int64_t, 6> ret;
       for (const APInt &element : elements) {
@@ -181,7 +181,7 @@
       }
       auto transposeType =
           RankedTensorType::get(transposeStaticShape, elementType);
-      auto transpose = rewriter.create<xla_hlo::TransposeOp>(
+      auto transpose = rewriter.create<mhlo::TransposeOp>(
           op.getLoc(), transposeType, value, make1DElementsAttr(permutation));
 
       SmallVector<Value, 6> reshapeShape;
@@ -199,7 +199,7 @@
           reshapeShape);
       auto reshapeShapeExtentTensor = rewriter.create<Shape::ToExtentTensorOp>(
           op.getLoc(), reshapeRankedShape);
-      value = rewriter.create<xla_hlo::DynamicReshapeOp>(
+      value = rewriter.create<mhlo::DynamicReshapeOp>(
           op.getLoc(), reshapeType, transpose, reshapeShapeExtentTensor);
     };
     SmallVector<Value, 6> batchingDimExtents;
@@ -220,7 +220,7 @@
         op.getLoc(), dstType, lhs, rhs);
     RankedTensorType transposeType = RankedTensorType::get(
         {dstStaticShape[0], dstStaticShape[2], dstStaticShape[1]}, elementType);
-    auto transpose = rewriter.create<xla_hlo::TransposeOp>(
+    auto transpose = rewriter.create<mhlo::TransposeOp>(
         op.getLoc(), transposeType, dst, make1DElementsAttr({0, 2, 1}));
     auto reshapeShape = batchingDimExtents;
     reshapeShape.append(lhsFreeDimExtents.begin(), lhsFreeDimExtents.end());
@@ -237,17 +237,16 @@
         reshapeShape);
     auto reshapeShapeExtentTensor = rewriter.create<Shape::ToExtentTensorOp>(
         op.getLoc(), reshapeRankedShape);
-    rewriter.replaceOpWithNewOp<xla_hlo::DynamicReshapeOp>(
+    rewriter.replaceOpWithNewOp<mhlo::DynamicReshapeOp>(
         op, op.getType(), transpose, reshapeShapeExtentTensor);
     return success();
   }
 };
 
-class LowerBroadcastInDimOp
-    : public OpRewritePattern<xla_hlo::BroadcastInDimOp> {
+class LowerBroadcastInDimOp : public OpRewritePattern<mhlo::BroadcastInDimOp> {
  public:
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::BroadcastInDimOp op,
+  LogicalResult matchAndRewrite(mhlo::BroadcastInDimOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = op.getType().cast<RankedTensorType>();
     auto shapeType =
@@ -260,17 +259,17 @@
   }
 };
 
-// Lower xla_hlo::BroadcastOp via xla_hlo::BroadcastInDimOp.
-class LowerBroadcastOp : public OpRewritePattern<xla_hlo::BroadcastOp> {
+// Lower mhlo::BroadcastOp via mhlo::BroadcastInDimOp.
+class LowerBroadcastOp : public OpRewritePattern<mhlo::BroadcastOp> {
  public:
   using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(xla_hlo::BroadcastOp op,
+  LogicalResult matchAndRewrite(mhlo::BroadcastOp op,
                                 PatternRewriter &rewriter) const override {
     auto type = op.getOperand().getType().cast<RankedTensorType>();
     auto resultType = op.getType().cast<RankedTensorType>();
     auto broadcastDimensions = llvm::to_vector<6>(llvm::seq<int64_t>(
         resultType.getRank() - type.getRank(), resultType.getRank()));
-    rewriter.replaceOpWithNewOp<xla_hlo::BroadcastInDimOp>(
+    rewriter.replaceOpWithNewOp<mhlo::BroadcastInDimOp>(
         op, op.getType(), op.getOperand(),
         rewriter.getI64TensorAttr(broadcastDimensions));
     return success();
@@ -286,16 +285,16 @@
     ConversionTarget target(*context);
     target.addLegalDialect<StandardOpsDialect>();
     target.addLegalDialect<IREE::VMLA::VMLADialect>();
-    target.addLegalDialect<xla_hlo::XlaHloDialect>();
+    target.addLegalDialect<mhlo::XlaHloDialect>();
     target.addLegalDialect<ShapeDialect>();
 
-    target.addIllegalOp<xla_hlo::DotGeneralOp>();
+    target.addIllegalOp<mhlo::DotGeneralOp>();
     patterns.insert<LowerDotGeneralOp>(context);
-    target.addIllegalOp<xla_hlo::DotOp>();
+    target.addIllegalOp<mhlo::DotOp>();
     patterns.insert<LowerDotOp>(context);
-    target.addIllegalOp<xla_hlo::BroadcastInDimOp>();
+    target.addIllegalOp<mhlo::BroadcastInDimOp>();
     patterns.insert<LowerBroadcastInDimOp>(context);
-    target.addIllegalOp<xla_hlo::BroadcastOp>();
+    target.addIllegalOp<mhlo::BroadcastOp>();
     patterns.insert<LowerBroadcastOp>(context);
 
     if (failed(applyPartialConversion(getOperation(), target, patterns))) {

diff --git a/iree/compiler/Dialect/VMLA/Transforms/UnrollReductions.cpp b/iree/compiler/Dialect/VMLA/Transforms/UnrollReductions.cpp
index 245c3b0..289ce23 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/UnrollReductions.cpp
+++ b/iree/compiler/Dialect/VMLA/Transforms/UnrollReductions.cpp

@@ -24,10 +24,10 @@
 
 namespace {
 
-// Unrolls a multi-dimensional xla_hlo.reduce op into one xla_hlo.reduce op per
+// Unrolls a multi-dimensional mhlo.reduce op into one mhlo.reduce op per
 // dimension. The XLA operation semantics state that this is a valid
 // transformation.
-void unrollReduceOp(xla_hlo::ReduceOp reduceOp) {
+void unrollReduceOp(mhlo::ReduceOp reduceOp) {
   // Create one op per dimension being reduced.
   // We'll do this by chaining the original input through with the temporary
   // reduction results. The results we end up with will be the originally
@@ -44,7 +44,7 @@
     // Create the new reduction using the results of the previous operation.
     auto singleAttrType =
         RankedTensorType::get({1}, builder.getIntegerType(64));
-    auto singleReduceOp = builder.create<xla_hlo::ReduceOp>(
+    auto singleReduceOp = builder.create<mhlo::ReduceOp>(
         reduceOp.getLoc(), temps, reduceOp.init_values(),
         DenseIntElementsAttr::get(singleAttrType, {dimension}));
     BlockAndValueMapping mapping;
@@ -66,7 +66,7 @@
  public:
   void runOnFunction() override {
     for (auto &block : getFunction()) {
-      auto reduceOps = llvm::to_vector<4>(block.getOps<xla_hlo::ReduceOp>());
+      auto reduceOps = llvm::to_vector<4>(block.getOps<mhlo::ReduceOp>());
       for (auto reduceOp : reduceOps) {
         if (reduceOp.dimensions().getNumElements() > 1) {
           unrollReduceOp(reduceOp);

diff --git a/iree/compiler/Dialect/VMLA/Transforms/test/pre_conversion_lowering.mlir b/iree/compiler/Dialect/VMLA/Transforms/test/pre_conversion_lowering.mlir
index ecd94f7..d21bfda 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/test/pre_conversion_lowering.mlir
+++ b/iree/compiler/Dialect/VMLA/Transforms/test/pre_conversion_lowering.mlir

@@ -5,7 +5,7 @@
 // CHECK-LABEL: func @f
 func @f(%arg0: tensor<3x4xf32>, %arg1: tensor<4x5xf32>) -> tensor<3x5xf32> {
   // CHECK: vmla.batch.matmul
-  %0 = "xla_hlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = {
+  %0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = {
     lhs_batching_dimensions = dense<[]> : tensor<0xi64>,
     lhs_contracting_dimensions = dense<[1]> : tensor<1xi64>,
     rhs_batching_dimensions = dense<[]> : tensor<0xi64>,
@@ -19,7 +19,7 @@
 // CHECK-LABEL: func @f
 func @f(%arg0: tensor<3xf32>) -> tensor<4x3xf32> {
   // CHECK: "shapex.ranked_broadcast_in_dim"(%arg0, %rs4_3)
-  %0 = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[1]> : tensor<1xi64>} : (tensor<3xf32>) -> tensor<4x3xf32>
+  %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[1]> : tensor<1xi64>} : (tensor<3xf32>) -> tensor<4x3xf32>
   return %0 : tensor<4x3xf32>
 }
 
@@ -28,6 +28,6 @@
 // CHECK-LABEL: func @f
 func @f(%arg0: tensor<3xf32>) -> tensor<5x6x3xf32> {
   // CHECK: "shapex.ranked_broadcast_in_dim"(%arg0, %rs5_6_3)
-  %0 = "xla_hlo.broadcast"(%arg0) {broadcast_sizes = dense<[5, 6]> : tensor<2xi64>} : (tensor<3xf32>) -> tensor<5x6x3xf32>
+  %0 = "mhlo.broadcast"(%arg0) {broadcast_sizes = dense<[5, 6]> : tensor<2xi64>} : (tensor<3xf32>) -> tensor<5x6x3xf32>
   return %0 : tensor<5x6x3xf32>
 }

diff --git a/iree/compiler/Dialect/VMLA/Transforms/test/transformation.mlir b/iree/compiler/Dialect/VMLA/Transforms/test/transformation.mlir
index 62cc55b..058c3c7 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/test/transformation.mlir
+++ b/iree/compiler/Dialect/VMLA/Transforms/test/transformation.mlir

@@ -8,7 +8,7 @@
   return
 }
 func @simpleMath_rgn_dispatch_0_impl(%arg0: tensor<4xf32>) -> tensor<4xf32> attributes {sym_visibility = "private"} {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
   return %0 : tensor<4xf32>
 }
 hal.interface @legacy_io attributes {sym_visibility = "private"} {

diff --git a/iree/compiler/Dialect/VMLA/Transforms/test/unroll_reductions.mlir b/iree/compiler/Dialect/VMLA/Transforms/test/unroll_reductions.mlir
index 39d58fe..8b89122 100644
--- a/iree/compiler/Dialect/VMLA/Transforms/test/unroll_reductions.mlir
+++ b/iree/compiler/Dialect/VMLA/Transforms/test/unroll_reductions.mlir

@@ -4,20 +4,20 @@
 func @unrolled_reduction(%arg0: tensor<4x2x8xf32>) -> tensor<4xf32> {
   // CHECK-DAG: %[[INITIAL:.+]] = constant dense<0.000000e+00> : tensor<f32>
   %cst = constant dense<0.000000e+00> : tensor<f32>
-  // CHECK-NEXT: %[[TEMP:.+]] = "xla_hlo.reduce"(%arg0, %[[INITIAL]]) ( {
+  // CHECK-NEXT: %[[TEMP:.+]] = "mhlo.reduce"(%arg0, %[[INITIAL]]) ( {
   // CHECK-NEXT: ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-  // CHECK-NEXT:   %2 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-  // CHECK-NEXT:   "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+  // CHECK-NEXT:   %2 = mhlo.add %arg1, %arg2 : tensor<f32>
+  // CHECK-NEXT:   "mhlo.return"(%2) : (tensor<f32>) -> ()
   // CHECK-NEXT: }) {dimensions = dense<2> : tensor<1xi64>} : (tensor<4x2x8xf32>, tensor<f32>) -> tensor<4x2xf32>
-  // CHECK-NEXT: %[[RESULT:.+]] = "xla_hlo.reduce"(%[[TEMP]], %[[INITIAL]]) ( {
+  // CHECK-NEXT: %[[RESULT:.+]] = "mhlo.reduce"(%[[TEMP]], %[[INITIAL]]) ( {
   // CHECK-NEXT: ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-  // CHECK-NEXT:   %2 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-  // CHECK-NEXT:   "xla_hlo.return"(%2) : (tensor<f32>) -> ()
+  // CHECK-NEXT:   %2 = mhlo.add %arg1, %arg2 : tensor<f32>
+  // CHECK-NEXT:   "mhlo.return"(%2) : (tensor<f32>) -> ()
   // CHECK-NEXT: }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x2xf32>, tensor<f32>) -> tensor<4xf32>
-  %0 = "xla_hlo.reduce"(%arg0, %cst) ( {
+  %0 = "mhlo.reduce"(%arg0, %cst) ( {
   ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>): // no predecessors
-    %1 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-    "xla_hlo.return"(%1) : (tensor<f32>) -> ()
+    %1 = mhlo.add %arg1, %arg2 : tensor<f32>
+    "mhlo.return"(%1) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<4x2x8xf32>, tensor<f32>) -> tensor<4xf32>
   // CHECK-NEXT: return %[[RESULT]]
   return %0 : tensor<4xf32>

diff --git a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
index a29a802..2094c02 100644
--- a/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir
+++ b/iree/compiler/Dialect/Vulkan/Utils/test/target_env_conversion.mlir

@@ -13,7 +13,7 @@
   }
   module {
     func @simpleMath_rgn_dispatch_0(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-      %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
+      %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
       return %0 : tensor<4xf32>
     }
   }

diff --git a/iree/compiler/Translation/test/do_not_optimize.mlir b/iree/compiler/Translation/test/do_not_optimize.mlir
index 52561df..4a5d367 100644
--- a/iree/compiler/Translation/test/do_not_optimize.mlir
+++ b/iree/compiler/Translation/test/do_not_optimize.mlir

@@ -50,6 +50,6 @@
 func @dynamic_constant() -> tensor<?x?xf32> {
   // CHECK: vm.call @hal.buffer_view.dim
   %input = iree.dynamic_shape_constant dense<3.0> : tensor<2x3xf32> -> tensor<?x?xf32>
-  %res = "xla_hlo.abs"(%input) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+  %res = "mhlo.abs"(%input) : (tensor<?x?xf32>) -> tensor<?x?xf32>
   return %res : tensor<?x?xf32>
 }

diff --git a/iree/compiler/Translation/test/smoketest.mlir b/iree/compiler/Translation/test/smoketest.mlir
index e7d14f4..69a8d97 100644
--- a/iree/compiler/Translation/test/smoketest.mlir
+++ b/iree/compiler/Translation/test/smoketest.mlir

@@ -43,9 +43,9 @@
 // CHECK: exported_functions:
 // CHECK: local_name: "hloElementwiseOps"
 func @hloElementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> attributes {iree.module.export} {
-  %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
-  %1 = xla_hlo.subtract %0, %arg0 : tensor<4xf32>
-  %2 = xla_hlo.multiply %1, %arg0 : tensor<4xf32>
+  %0 = mhlo.add %arg0, %arg0 : tensor<4xf32>
+  %1 = mhlo.subtract %0, %arg0 : tensor<4xf32>
+  %2 = mhlo.multiply %1, %arg0 : tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 }

diff --git a/iree/modules/check/test/success.mlir b/iree/modules/check/test/success.mlir
index 7904dbb..bf17af4 100644
--- a/iree/modules/check/test/success.mlir
+++ b/iree/modules/check/test/success.mlir

@@ -55,7 +55,7 @@
 
 func @add() attributes { iree.module.export } {
   %c5 = iree.unfoldable_constant dense<5> : tensor<i32>
-  %result = "xla_hlo.add"(%c5, %c5) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.add"(%c5, %c5) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   %c10 = iree.unfoldable_constant dense<10> : tensor<i32>
   check.expect_eq(%result, %c10) : tensor<i32>
   return
@@ -64,15 +64,15 @@
 func @floats() attributes { iree.module.export } {
   %cp1 = iree.unfoldable_constant dense<0.1> : tensor<f32>
   %c1 = iree.unfoldable_constant dense<1.0> : tensor<f32>
-  %p2 = "xla_hlo.add"(%cp1, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p3 = "xla_hlo.add"(%p2, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p4 = "xla_hlo.add"(%p3, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p5 = "xla_hlo.add"(%p4, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p6 = "xla_hlo.add"(%p5, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p7 = "xla_hlo.add"(%p6, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p8 = "xla_hlo.add"(%p7, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %p9 = "xla_hlo.add"(%p8, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  %approximately_1 = "xla_hlo.add"(%p9, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p2 = "mhlo.add"(%cp1, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p3 = "mhlo.add"(%p2, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p4 = "mhlo.add"(%p3, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p5 = "mhlo.add"(%p4, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p6 = "mhlo.add"(%p5, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p7 = "mhlo.add"(%p6, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p8 = "mhlo.add"(%p7, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %p9 = "mhlo.add"(%p8, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %approximately_1 = "mhlo.add"(%p9, %cp1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
 
   check.expect_almost_eq(%approximately_1, %c1) : tensor<f32>
   return

diff --git a/iree/samples/models/mnist.mlir b/iree/samples/models/mnist.mlir
index a78c669..53b6cb8 100644
--- a/iree/samples/models/mnist.mlir
+++ b/iree/samples/models/mnist.mlir

@@ -21,31 +21,31 @@
     %1 = flow.variable.address @"__iree_flow___sm_node16__model.layer-2.bias" : !iree.ptr<tensor<128xf32>>
     %2 = flow.variable.address @"__iree_flow___sm_node21__model.layer-3.kernel" : !iree.ptr<tensor<128x10xf32>>
     %3 = flow.variable.address @"__iree_flow___sm_node22__model.layer-3.bias" : !iree.ptr<tensor<10xf32>>
-    %4 = xla_hlo.constant dense<0xFF800000> : tensor<f32>
-    %5 = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+    %4 = mhlo.constant dense<0xFF800000> : tensor<f32>
+    %5 = mhlo.constant dense<0.000000e+00> : tensor<f32>
     %6 = flow.variable.load.indirect %3 : !iree.ptr<tensor<10xf32>> -> tensor<10xf32>
     %7 = flow.variable.load.indirect %2 : !iree.ptr<tensor<128x10xf32>> -> tensor<128x10xf32>
     %8 = flow.variable.load.indirect %1 : !iree.ptr<tensor<128xf32>> -> tensor<128xf32>
     %9 = flow.variable.load.indirect %0 : !iree.ptr<tensor<784x128xf32>> -> tensor<784x128xf32>
-    %10 = "xla_hlo.reshape"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
-    %11 = "xla_hlo.dot"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
-    %12 = "xla_hlo.add"(%11, %8) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x128xf32>, tensor<128xf32>) -> tensor<1x128xf32>
-    %13 = "xla_hlo.maximum"(%12, %5) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<1x128xf32>, tensor<f32>) -> tensor<1x128xf32>
-    %14 = "xla_hlo.dot"(%13, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
-    %15 = "xla_hlo.add"(%14, %6) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<10xf32>) -> tensor<1x10xf32>
-    %16 = "xla_hlo.reduce"(%15, %4) ( {
+    %10 = "mhlo.reshape"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
+    %11 = "mhlo.dot"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
+    %12 = "mhlo.add"(%11, %8) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x128xf32>, tensor<128xf32>) -> tensor<1x128xf32>
+    %13 = "mhlo.maximum"(%12, %5) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<1x128xf32>, tensor<f32>) -> tensor<1x128xf32>
+    %14 = "mhlo.dot"(%13, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
+    %15 = "mhlo.add"(%14, %6) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<10xf32>) -> tensor<1x10xf32>
+    %16 = "mhlo.reduce"(%15, %4) ( {
     ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-      %21 = xla_hlo.maximum %arg1, %arg2 : tensor<f32>
-      "xla_hlo.return"(%21) : (tensor<f32>) -> ()
+      %21 = mhlo.maximum %arg1, %arg2 : tensor<f32>
+      "mhlo.return"(%21) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %17 = "xla_hlo.subtract"(%15, %16) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<1xf32>) -> tensor<1x10xf32>
-    %18 = "xla_hlo.exponential"(%17) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %19 = "xla_hlo.reduce"(%18, %5) ( {
+    %17 = "mhlo.subtract"(%15, %16) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<1xf32>) -> tensor<1x10xf32>
+    %18 = "mhlo.exponential"(%17) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %19 = "mhlo.reduce"(%18, %5) ( {
     ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-      %21 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-      "xla_hlo.return"(%21) : (tensor<f32>) -> ()
+      %21 = mhlo.add %arg1, %arg2 : tensor<f32>
+      "mhlo.return"(%21) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %20 = "xla_hlo.divide"(%18, %19) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<1xf32>) -> tensor<1x10xf32>
+    %20 = "mhlo.divide"(%18, %19) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<1xf32>) -> tensor<1x10xf32>
     return %20 : tensor<1x10xf32>
   }
 }

diff --git a/iree/samples/simple_embedding/simple_embedding_test.mlir b/iree/samples/simple_embedding/simple_embedding_test.mlir
index 2839639..7966f82 100644
--- a/iree/samples/simple_embedding/simple_embedding_test.mlir
+++ b/iree/samples/simple_embedding/simple_embedding_test.mlir

@@ -1,5 +1,5 @@
 func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
     attributes { iree.module.export } {
-  %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   return %0 : tensor<4xf32>
 }

diff --git a/iree/samples/vulkan/simple_mul.mlir b/iree/samples/vulkan/simple_mul.mlir
index 2839639..7966f82 100644
--- a/iree/samples/vulkan/simple_mul.mlir
+++ b/iree/samples/vulkan/simple_mul.mlir

@@ -1,5 +1,5 @@
 func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
     attributes { iree.module.export } {
-  %0 = "xla_hlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   return %0 : tensor<4xf32>
 }

diff --git a/iree/test/e2e/models/collatz.mlir b/iree/test/e2e/models/collatz.mlir
index 9cd91c7..53767f1 100644
--- a/iree/test/e2e/models/collatz.mlir
+++ b/iree/test/e2e/models/collatz.mlir

@@ -3,34 +3,34 @@
 // CHECK-LABEL: EXEC @collatz
 func @collatz() -> tensor<f32> {
   %arg0 = iree.unfoldable_constant dense<178.0> : tensor<f32>
-  %0 = xla_hlo.constant dense<1.0> : tensor<f32>
-  %1 = xla_hlo.constant dense<3.0> : tensor<f32>
-  %2 = xla_hlo.constant dense<2.0> : tensor<f32>
-  %3 = xla_hlo.constant dense<0.0> : tensor<f32>
+  %0 = mhlo.constant dense<1.0> : tensor<f32>
+  %1 = mhlo.constant dense<3.0> : tensor<f32>
+  %2 = mhlo.constant dense<2.0> : tensor<f32>
+  %3 = mhlo.constant dense<0.0> : tensor<f32>
   br ^bb1(%3, %arg0 : tensor<f32>, tensor<f32>)
 ^bb1(%4: tensor<f32>, %5: tensor<f32>):
-  %6 = "xla_hlo.compare"(%5, %0) {comparison_direction = "GT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %6 = "mhlo.compare"(%5, %0) {comparison_direction = "GT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
   %7 = extract_element %6[] : tensor<i1>
   cond_br %7, ^bb2(%4, %5 : tensor<f32>, tensor<f32>), ^bb6(%4 : tensor<f32>)
 ^bb2(%8: tensor<f32>, %9: tensor<f32>):
-  %10 = xla_hlo.add %8, %0 : tensor<f32>
-  %11 = xla_hlo.remainder %9, %2 : tensor<f32>
-  %12 = "xla_hlo.compare"(%11, %3) {comparison_direction = "NE"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
-  %13 = "xla_hlo.compare"(%2, %3) {comparison_direction = "LT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
-  %14 = "xla_hlo.compare"(%11, %3) {comparison_direction = "LT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
-  %15 = "xla_hlo.compare"(%13, %14) {comparison_direction = "NE"} : (tensor<i1>, tensor<i1>) -> tensor<i1>
-  %16 = xla_hlo.and %12, %15 : tensor<i1>
-  %17 = xla_hlo.add %11, %2 : tensor<f32>
-  %18 = "xla_hlo.select"(%16, %17, %11) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
-  %19 = "xla_hlo.compare"(%18, %3) {comparison_direction = "GT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %10 = mhlo.add %8, %0 : tensor<f32>
+  %11 = mhlo.remainder %9, %2 : tensor<f32>
+  %12 = "mhlo.compare"(%11, %3) {comparison_direction = "NE"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %13 = "mhlo.compare"(%2, %3) {comparison_direction = "LT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %14 = "mhlo.compare"(%11, %3) {comparison_direction = "LT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %15 = "mhlo.compare"(%13, %14) {comparison_direction = "NE"} : (tensor<i1>, tensor<i1>) -> tensor<i1>
+  %16 = mhlo.and %12, %15 : tensor<i1>
+  %17 = mhlo.add %11, %2 : tensor<f32>
+  %18 = "mhlo.select"(%16, %17, %11) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
+  %19 = "mhlo.compare"(%18, %3) {comparison_direction = "GT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
   %20 = extract_element %19[] : tensor<i1>
   cond_br %20, ^bb3, ^bb4
 ^bb3: // pred: ^bb2
-  %21 = xla_hlo.multiply %9, %1 : tensor<f32>
-  %22 = xla_hlo.add %21, %0 : tensor<f32>
+  %21 = mhlo.multiply %9, %1 : tensor<f32>
+  %22 = mhlo.add %21, %0 : tensor<f32>
   br ^bb5(%22 : tensor<f32>)
 ^bb4: // pred: ^bb2
-  %23 = xla_hlo.divide %9, %2 : tensor<f32>
+  %23 = mhlo.divide %9, %2 : tensor<f32>
   br ^bb5(%23 : tensor<f32>)
 ^bb5(%24: tensor<f32>): // 2 preds: ^bb3, ^bb4
   br ^bb1(%10, %24 : tensor<f32>, tensor<f32>)

diff --git a/iree/test/e2e/models/edge_detection.mlir b/iree/test/e2e/models/edge_detection.mlir
index 9110a10..857a0ee 100644
--- a/iree/test/e2e/models/edge_detection.mlir
+++ b/iree/test/e2e/models/edge_detection.mlir

@@ -10,14 +10,14 @@
 module {
   // CHECK-LABEL: EXEC @edge_detect_sobel_operator
   func @edge_detect_sobel_operator(%arg0: tensor<1x128x128x1xf32> {tf_saved_model.index_path = [0]}) -> (tensor<1x128x128x1xf32> {tf_saved_model.index_path = []}) attributes {iree.module.export, tf._input_shapes = ["tfshape$dim { size: 1 } dim { size: 128 } dim { size: 128 } dim { size: 1 }"]} {
-    %0 = xla_hlo.constant dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]> : tensor<3x3x1x1xf32>
-    %1 = xla_hlo.constant dense<[[[[1.000000e+00]], [[2.000000e+00]], [[1.000000e+00]]], [[[0.000000e+00]], [[0.000000e+00]], [[0.000000e+00]]], [[[-1.000000e+00]], [[-2.000000e+00]], [[-1.000000e+00]]]]> : tensor<3x3x1x1xf32>
-    %2 = "xla_hlo.convolution"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
-    %3 = xla_hlo.multiply %2, %2 : tensor<1x128x128x1xf32>
-    %4 = "xla_hlo.convolution"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
-    %5 = xla_hlo.multiply %4, %4 : tensor<1x128x128x1xf32>
-    %6 = xla_hlo.add %3, %5 : tensor<1x128x128x1xf32>
-    %7 = "xla_hlo.sqrt"(%6) : (tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32>
+    %0 = mhlo.constant dense<[[[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]], [[[-2.000000e+00]], [[0.000000e+00]], [[2.000000e+00]]], [[[-1.000000e+00]], [[0.000000e+00]], [[1.000000e+00]]]]> : tensor<3x3x1x1xf32>
+    %1 = mhlo.constant dense<[[[[1.000000e+00]], [[2.000000e+00]], [[1.000000e+00]]], [[[0.000000e+00]], [[0.000000e+00]], [[0.000000e+00]]], [[[-1.000000e+00]], [[-2.000000e+00]], [[-1.000000e+00]]]]> : tensor<3x3x1x1xf32>
+    %2 = "mhlo.convolution"(%arg0, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
+    %3 = mhlo.multiply %2, %2 : tensor<1x128x128x1xf32>
+    %4 = "mhlo.convolution"(%arg0, %1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<1> : tensor<2x2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>} : (tensor<1x128x128x1xf32>, tensor<3x3x1x1xf32>) -> tensor<1x128x128x1xf32>
+    %5 = mhlo.multiply %4, %4 : tensor<1x128x128x1xf32>
+    %6 = mhlo.add %3, %5 : tensor<1x128x128x1xf32>
+    %7 = "mhlo.sqrt"(%6) : (tensor<1x128x128x1xf32>) -> tensor<1x128x128x1xf32>
     return %7 : tensor<1x128x128x1xf32>
   }
   // CHECK: 1x128x128x1xf32=

diff --git a/iree/test/e2e/models/fragment_000.mlir b/iree/test/e2e/models/fragment_000.mlir
index acac1e4..c5b7d8a 100644
--- a/iree/test/e2e/models/fragment_000.mlir
+++ b/iree/test/e2e/models/fragment_000.mlir

@@ -9,20 +9,20 @@
     %2: tensor<f32>,
     %3: tensor<5x5xf32>,
     %4: tensor<5xf32>) -> tensor<5x5xf32> {
-  %5 = "xla_hlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.44"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
-  %6 = "xla_hlo.broadcast_in_dim"(%2) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.9"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %7 = xla_hlo.multiply %5, %6 : tensor<5x1x5xf32>
-  %8 = "xla_hlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.47"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %9 = "xla_hlo.compare"(%7, %8) {comparison_direction = "GT"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
-  %10 = "xla_hlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.11"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %11 = "xla_hlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.67"} : (tensor<f32>) -> tensor<5x5xf32>
-  %12 = "xla_hlo.broadcast_in_dim"(%4) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.64"} : (tensor<5xf32>) -> tensor<5x5xf32>
-  %13 = xla_hlo.add %3, %12 : tensor<5x5xf32>
-  %14 = xla_hlo.maximum %11, %13 {name = "maximum.68"} : tensor<5x5xf32>
-  %15 = "xla_hlo.reshape"(%14) {name = "reshape.70"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
-  %16 = "xla_hlo.select"(%9, %10, %15) {name = "select.71"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %17 = "xla_hlo.copy"(%16) {name = "copy.4"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %18 = "xla_hlo.reshape"(%17) {name = "reshape.72"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
+  %5 = "mhlo.broadcast_in_dim"(%1) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.44"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
+  %6 = "mhlo.broadcast_in_dim"(%2) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.9"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %7 = mhlo.multiply %5, %6 : tensor<5x1x5xf32>
+  %8 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.47"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %9 = "mhlo.compare"(%7, %8) {comparison_direction = "GT"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
+  %10 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.11"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %11 = "mhlo.broadcast_in_dim"(%0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.67"} : (tensor<f32>) -> tensor<5x5xf32>
+  %12 = "mhlo.broadcast_in_dim"(%4) {broadcast_dimensions = dense<1> : tensor<1xi64>, name = "broadcast.64"} : (tensor<5xf32>) -> tensor<5x5xf32>
+  %13 = mhlo.add %3, %12 : tensor<5x5xf32>
+  %14 = mhlo.maximum %11, %13 {name = "maximum.68"} : tensor<5x5xf32>
+  %15 = "mhlo.reshape"(%14) {name = "reshape.70"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
+  %16 = "mhlo.select"(%9, %10, %15) {name = "select.71"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %17 = "mhlo.copy"(%16) {name = "copy.4"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %18 = "mhlo.reshape"(%17) {name = "reshape.72"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
   return %18 : tensor<5x5xf32>
 }
 

diff --git a/iree/test/e2e/models/fullyconnected.mlir b/iree/test/e2e/models/fullyconnected.mlir
index 628be3a..463a5f5 100644
--- a/iree/test/e2e/models/fullyconnected.mlir
+++ b/iree/test/e2e/models/fullyconnected.mlir

@@ -5,80 +5,80 @@
 // CHECK-LABEL: EXEC @main
 func @main(%arg0: tensor<1x5xf32>, %arg1: tensor<1x5x3x1xf32>) -> tuple<tensor<5x1x5xf32>>
   attributes {iree.module.export} {
-  %0 = "xla_hlo.reshape"(%arg0) {name = "reshape.3"} : (tensor<1x5xf32>) -> tensor<1x5xf32>
-  %1 = "xla_hlo.transpose"(%0) {name = "transpose.41", permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
-  %2 = "xla_hlo.reshape"(%1) {name = "reshape.42"} : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
-  %3 = "xla_hlo.reshape"(%2) {name = "reshape.55"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
-  %4 = "xla_hlo.broadcast_in_dim"(%3) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.56"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
+  %0 = "mhlo.reshape"(%arg0) {name = "reshape.3"} : (tensor<1x5xf32>) -> tensor<1x5xf32>
+  %1 = "mhlo.transpose"(%0) {name = "transpose.41", permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
+  %2 = "mhlo.reshape"(%1) {name = "reshape.42"} : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
+  %3 = "mhlo.reshape"(%2) {name = "reshape.55"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
+  %4 = "mhlo.broadcast_in_dim"(%3) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.56"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
   %cst = constant  {name = "constant.22"} dense<1.000000e+00> : tensor<f32>
-  %5 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.23"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %6 = xla_hlo.multiply %4, %5 {name = "multiply.57"} : tensor<5x1x5xf32>
+  %5 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.23"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %6 = mhlo.multiply %4, %5 {name = "multiply.57"} : tensor<5x1x5xf32>
   %cst_0 = constant  {name = "constant.58"} dense<0.000000e+00> : tensor<f32>
-  %7 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.59"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %8 = "xla_hlo.compare"(%6, %7) {comparison_direction = "GT", name = "compare.60"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
+  %7 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.59"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %8 = "mhlo.compare"(%6, %7) {comparison_direction = "GT", name = "compare.60"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
   %cst_1 = constant  {name = "constant.24"} dense<0.000000e+00> : tensor<f32>
-  %9 = "xla_hlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.25"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %9 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.25"} : (tensor<f32>) -> tensor<5x1x5xf32>
   %cst_2 = constant  {name = "constant.90"} dense<0.000000e+00> : tensor<f32>
-  %10 = "xla_hlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.91"} : (tensor<f32>) -> tensor<5x5xf32>
-  %11 = "xla_hlo.reshape"(%2) {name = "reshape.49"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
-  %12 = "xla_hlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.50"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
+  %10 = "mhlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.91"} : (tensor<f32>) -> tensor<5x5xf32>
+  %11 = "mhlo.reshape"(%2) {name = "reshape.49"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
+  %12 = "mhlo.broadcast_in_dim"(%11) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.50"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
   %cst_3 = constant  {name = "constant.15"} dense<1.000000e+00> : tensor<f32>
-  %13 = "xla_hlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.16"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %14 = xla_hlo.multiply %12, %13 {name = "multiply.51"} : tensor<5x1x5xf32>
+  %13 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.16"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %14 = mhlo.multiply %12, %13 {name = "multiply.51"} : tensor<5x1x5xf32>
   %cst_4 = constant  {name = "constant.52"} dense<0.000000e+00> : tensor<f32>
-  %15 = "xla_hlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.53"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %16 = "xla_hlo.compare"(%14, %15) {comparison_direction = "GT", name = "compare.54"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
+  %15 = "mhlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.53"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %16 = "mhlo.compare"(%14, %15) {comparison_direction = "GT", name = "compare.54"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
   %cst_5 = constant  {name = "constant.17"} dense<0.000000e+00> : tensor<f32>
-  %17 = "xla_hlo.broadcast_in_dim"(%cst_5) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.18"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %17 = "mhlo.broadcast_in_dim"(%cst_5) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.18"} : (tensor<f32>) -> tensor<5x1x5xf32>
   %cst_6 = constant  {name = "constant.78"} dense<0.000000e+00> : tensor<f32>
-  %18 = "xla_hlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.79"} : (tensor<f32>) -> tensor<5x5xf32>
-  %19 = "xla_hlo.reshape"(%2) {name = "reshape.43"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
-  %20 = "xla_hlo.broadcast_in_dim"(%19) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.44"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
+  %18 = "mhlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.79"} : (tensor<f32>) -> tensor<5x5xf32>
+  %19 = "mhlo.reshape"(%2) {name = "reshape.43"} : (tensor<5x1x1xf32>) -> tensor<5x1xf32>
+  %20 = "mhlo.broadcast_in_dim"(%19) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>, name = "broadcast.44"} : (tensor<5x1xf32>) -> tensor<5x1x5xf32>
   %cst_7 = constant  {name = "constant.8"} dense<1.000000e+00> : tensor<f32>
-  %21 = "xla_hlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.9"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %22 = xla_hlo.multiply %20, %21 {name = "multiply.45"} : tensor<5x1x5xf32>
+  %21 = "mhlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.9"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %22 = mhlo.multiply %20, %21 {name = "multiply.45"} : tensor<5x1x5xf32>
   %cst_8 = constant  {name = "constant.46"} dense<0.000000e+00> : tensor<f32>
-  %23 = "xla_hlo.broadcast_in_dim"(%cst_8) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.47"} : (tensor<f32>) -> tensor<5x1x5xf32>
-  %24 = "xla_hlo.compare"(%22, %23) {comparison_direction = "GT", name = "compare.48"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
+  %23 = "mhlo.broadcast_in_dim"(%cst_8) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.47"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %24 = "mhlo.compare"(%22, %23) {comparison_direction = "GT", name = "compare.48"} : (tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xi1>
   %cst_9 = constant  {name = "constant.10"} dense<0.000000e+00> : tensor<f32>
-  %25 = "xla_hlo.broadcast_in_dim"(%cst_9) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.11"} : (tensor<f32>) -> tensor<5x1x5xf32>
+  %25 = "mhlo.broadcast_in_dim"(%cst_9) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.11"} : (tensor<f32>) -> tensor<5x1x5xf32>
   %cst_10 = constant  {name = "constant.66"} dense<0.000000e+00> : tensor<f32>
-  %26 = "xla_hlo.broadcast_in_dim"(%cst_10) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.67"} : (tensor<f32>) -> tensor<5x5xf32>
-  %27 = "xla_hlo.copy"(%arg1) {name = "copy.3"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3x1xf32>
-  %28 = "xla_hlo.reshape"(%27) {name = "reshape.4"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3x1xf32>
-  %29 = "xla_hlo.reshape"(%28) {name = "reshape.38"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3xf32>
-  %30 = "xla_hlo.transpose"(%29) {name = "transpose.39", permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x3xf32>) -> tensor<5x1x3xf32>
-  %31 = "xla_hlo.reshape"(%30) {name = "reshape.40"} : (tensor<5x1x3xf32>) -> tensor<5x3xf32>
+  %26 = "mhlo.broadcast_in_dim"(%cst_10) {broadcast_dimensions = dense<[]> : tensor<0xi64>, name = "broadcast.67"} : (tensor<f32>) -> tensor<5x5xf32>
+  %27 = "mhlo.copy"(%arg1) {name = "copy.3"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3x1xf32>
+  %28 = "mhlo.reshape"(%27) {name = "reshape.4"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3x1xf32>
+  %29 = "mhlo.reshape"(%28) {name = "reshape.38"} : (tensor<1x5x3x1xf32>) -> tensor<1x5x3xf32>
+  %30 = "mhlo.transpose"(%29) {name = "transpose.39", permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x3xf32>) -> tensor<5x1x3xf32>
+  %31 = "mhlo.reshape"(%30) {name = "reshape.40"} : (tensor<5x1x3xf32>) -> tensor<5x3xf32>
   %cst_11 = constant  {name = "constant.61"} dense<[[0.706495285, -0.567672312, 0.483717591, 0.522725761, 0.7563259], [-0.0899272263, -0.283501834, -0.350822538, -0.351515919, -0.337136656], [-0.451804549, 0.372324884, -0.620518147, 0.235451385, 0.851095855]]> : tensor<3x5xf32>
-  %32 = "xla_hlo.dot"(%31, %cst_11) {name = "dot.62", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
+  %32 = "mhlo.dot"(%31, %cst_11) {name = "dot.62", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
   %cst_12 = constant  {name = "constant.63"} dense<[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]> : tensor<5xf32>
-  %33 = "xla_hlo.broadcast_in_dim"(%cst_12) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.64"} : (tensor<5xf32>) -> tensor<5x5xf32>
-  %34 = xla_hlo.add %32, %33 {name = "add.65"} : tensor<5x5xf32>
-  %35 = xla_hlo.maximum %26, %34 {name = "maximum.68"} : tensor<5x5xf32>
-  %36 = "xla_hlo.reshape"(%35) {name = "reshape.70"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
-  %37 = "xla_hlo.select"(%24, %25, %36) {name = "select.71"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %38 = "xla_hlo.copy"(%37) {name = "copy.4"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %39 = "xla_hlo.reshape"(%38) {name = "reshape.72"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
+  %33 = "mhlo.broadcast_in_dim"(%cst_12) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.64"} : (tensor<5xf32>) -> tensor<5x5xf32>
+  %34 = mhlo.add %32, %33 {name = "add.65"} : tensor<5x5xf32>
+  %35 = mhlo.maximum %26, %34 {name = "maximum.68"} : tensor<5x5xf32>
+  %36 = "mhlo.reshape"(%35) {name = "reshape.70"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
+  %37 = "mhlo.select"(%24, %25, %36) {name = "select.71"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %38 = "mhlo.copy"(%37) {name = "copy.4"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %39 = "mhlo.reshape"(%38) {name = "reshape.72"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
   %cst_13 = constant  {name = "constant.73"} dense<[[-0.0118641369, -3.785000e-02, 0.489048243, 0.321015775, -0.702280283], [-0.280262798, -0.724645615, -0.00332254497, 0.392334729, 0.619746447], [-0.113318317, -0.180415511, -0.146743968, 0.250408649, -0.442881733], [0.115600757, 0.703136146, -0.00812680274, -0.225454301, -0.0835619792], [-0.136745885, -6.298570e-01, 0.43629986, -0.689790308, 0.230725273]]> : tensor<5x5xf32>
-  %40 = "xla_hlo.dot"(%39, %cst_13) {name = "dot.74", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x5xf32>, tensor<5x5xf32>) -> tensor<5x5xf32>
+  %40 = "mhlo.dot"(%39, %cst_13) {name = "dot.74", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x5xf32>, tensor<5x5xf32>) -> tensor<5x5xf32>
   %cst_14 = constant  {name = "constant.75"} dense<[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]> : tensor<5xf32>
-  %41 = "xla_hlo.broadcast_in_dim"(%cst_14) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.76"} : (tensor<5xf32>) -> tensor<5x5xf32>
-  %42 = xla_hlo.add %40, %41 {name = "add.77"} : tensor<5x5xf32>
-  %43 = xla_hlo.maximum %18, %42 {name = "maximum.80"} : tensor<5x5xf32>
-  %44 = "xla_hlo.reshape"(%43) {name = "reshape.82"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
-  %45 = "xla_hlo.select"(%16, %17, %44) {name = "select.83"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %46 = "xla_hlo.copy"(%45) {name = "copy.5"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %47 = "xla_hlo.reshape"(%46) {name = "reshape.84"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
+  %41 = "mhlo.broadcast_in_dim"(%cst_14) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.76"} : (tensor<5xf32>) -> tensor<5x5xf32>
+  %42 = mhlo.add %40, %41 {name = "add.77"} : tensor<5x5xf32>
+  %43 = mhlo.maximum %18, %42 {name = "maximum.80"} : tensor<5x5xf32>
+  %44 = "mhlo.reshape"(%43) {name = "reshape.82"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
+  %45 = "mhlo.select"(%16, %17, %44) {name = "select.83"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %46 = "mhlo.copy"(%45) {name = "copy.5"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %47 = "mhlo.reshape"(%46) {name = "reshape.84"} : (tensor<5x1x5xf32>) -> tensor<5x5xf32>
   %cst_15 = constant  {name = "constant.85"} dense<[[-0.136191264, -0.0401721969, 0.38497138, -5.850760e-01, 0.370910525], [-0.391011149, 0.0266356133, 0.309115469, -0.205079094, -0.559861302], [0.497760415, 0.689488232, 0.0759292394, -0.33134672, -0.237128958], [-0.53243047, 0.476418108, -0.371978909, 0.283265263, 0.63842845], [0.101761498, -0.218626946, 0.475128263, 0.042601984, 0.0988005772]]> : tensor<5x5xf32>
-  %48 = "xla_hlo.dot"(%47, %cst_15) {name = "dot.86", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x5xf32>, tensor<5x5xf32>) -> tensor<5x5xf32>
+  %48 = "mhlo.dot"(%47, %cst_15) {name = "dot.86", precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x5xf32>, tensor<5x5xf32>) -> tensor<5x5xf32>
   %cst_16 = constant  {name = "constant.87"} dense<[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00]> : tensor<5xf32>
-  %49 = "xla_hlo.broadcast_in_dim"(%cst_16) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.88"} : (tensor<5xf32>) -> tensor<5x5xf32>
-  %50 = xla_hlo.add %48, %49 {name = "add.89"} : tensor<5x5xf32>
-  %51 = xla_hlo.maximum %10, %50 {name = "maximum.92"} : tensor<5x5xf32>
-  %52 = "xla_hlo.reshape"(%51) {name = "reshape.94"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
-  %53 = "xla_hlo.select"(%8, %9, %52) {name = "select.95"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %54 = "xla_hlo.reshape"(%53) {name = "reshape.96"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
-  %55 = "xla_hlo.tuple"(%54) {name = "tuple.97"} : (tensor<5x1x5xf32>) -> tuple<tensor<5x1x5xf32>>
+  %49 = "mhlo.broadcast_in_dim"(%cst_16) {broadcast_dimensions = dense<[1]> : tensor<1xi64>, name = "broadcast.88"} : (tensor<5xf32>) -> tensor<5x5xf32>
+  %50 = mhlo.add %48, %49 {name = "add.89"} : tensor<5x5xf32>
+  %51 = mhlo.maximum %10, %50 {name = "maximum.92"} : tensor<5x5xf32>
+  %52 = "mhlo.reshape"(%51) {name = "reshape.94"} : (tensor<5x5xf32>) -> tensor<5x1x5xf32>
+  %53 = "mhlo.select"(%8, %9, %52) {name = "select.95"} : (tensor<5x1x5xi1>, tensor<5x1x5xf32>, tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %54 = "mhlo.reshape"(%53) {name = "reshape.96"} : (tensor<5x1x5xf32>) -> tensor<5x1x5xf32>
+  %55 = "mhlo.tuple"(%54) {name = "tuple.97"} : (tensor<5x1x5xf32>) -> tuple<tensor<5x1x5xf32>>
   return %55 : tuple<tensor<5x1x5xf32>>
 }
 

diff --git a/iree/test/e2e/models/mnist_fake_weights.mlir b/iree/test/e2e/models/mnist_fake_weights.mlir
index 161a9c9..e0dfd64 100644
--- a/iree/test/e2e/models/mnist_fake_weights.mlir
+++ b/iree/test/e2e/models/mnist_fake_weights.mlir

@@ -15,33 +15,33 @@
     %1 = flow.variable.address @"__iree_flow___sm_node16__model.layer-2.bias" : !iree.ptr<tensor<128xf32>>
     %2 = flow.variable.address @"__iree_flow___sm_node21__model.layer-3.kernel" : !iree.ptr<tensor<128x10xf32>>
     %3 = flow.variable.address @"__iree_flow___sm_node22__model.layer-3.bias" : !iree.ptr<tensor<10xf32>>
-    %4 = xla_hlo.constant dense<0xFF800000> : tensor<f32>
-    %5 = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+    %4 = mhlo.constant dense<0xFF800000> : tensor<f32>
+    %5 = mhlo.constant dense<0.000000e+00> : tensor<f32>
     %6 = flow.variable.load.indirect %3 : !iree.ptr<tensor<10xf32>> -> tensor<10xf32>
     %7 = flow.variable.load.indirect %2 : !iree.ptr<tensor<128x10xf32>> -> tensor<128x10xf32>
     %8 = flow.variable.load.indirect %1 : !iree.ptr<tensor<128xf32>> -> tensor<128xf32>
     %9 = flow.variable.load.indirect %0 : !iree.ptr<tensor<784x128xf32>> -> tensor<784x128xf32>
-    %10 = "xla_hlo.reshape"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
-    %11 = "xla_hlo.dot"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
+    %10 = "mhlo.reshape"(%arg0) : (tensor<1x28x28x1xf32>) -> tensor<1x784xf32>
+    %11 = "mhlo.dot"(%10, %9) : (tensor<1x784xf32>, tensor<784x128xf32>) -> tensor<1x128xf32>
     %12 = xla_chlo.broadcast_add %11, %8 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x128xf32>, tensor<128xf32>) -> tensor<1x128xf32>
     %13 = xla_chlo.broadcast_maximum %12, %5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<1x128xf32>, tensor<f32>) -> tensor<1x128xf32>
-    %14 = "xla_hlo.dot"(%13, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
+    %14 = "mhlo.dot"(%13, %7) : (tensor<1x128xf32>, tensor<128x10xf32>) -> tensor<1x10xf32>
     %15 = xla_chlo.broadcast_add %14, %6 {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<10xf32>) -> tensor<1x10xf32>
-    %16 = "xla_hlo.reduce"(%15, %4) ( {
+    %16 = "mhlo.reduce"(%15, %4) ( {
     ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-      %23 = xla_hlo.maximum %arg1, %arg2 : tensor<f32>
-      "xla_hlo.return"(%23) : (tensor<f32>) -> ()
+      %23 = mhlo.maximum %arg1, %arg2 : tensor<f32>
+      "mhlo.return"(%23) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %17 = "xla_hlo.broadcast_in_dim"(%16) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
-    %18 = xla_hlo.subtract %15, %17 : tensor<1x10xf32>
-    %19 = "xla_hlo.exponential"(%18) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %20 = "xla_hlo.reduce"(%19, %5) ( {
+    %17 = "mhlo.broadcast_in_dim"(%16) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %18 = mhlo.subtract %15, %17 : tensor<1x10xf32>
+    %19 = "mhlo.exponential"(%18) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %20 = "mhlo.reduce"(%19, %5) ( {
     ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):  // no predecessors
-      %23 = xla_hlo.add %arg1, %arg2 : tensor<f32>
-      "xla_hlo.return"(%23) : (tensor<f32>) -> ()
+      %23 = mhlo.add %arg1, %arg2 : tensor<f32>
+      "mhlo.return"(%23) : (tensor<f32>) -> ()
     }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
-    %21 = "xla_hlo.broadcast_in_dim"(%20) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
-    %22 = xla_hlo.divide %19, %21 : tensor<1x10xf32>
+    %21 = "mhlo.broadcast_in_dim"(%20) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %22 = mhlo.divide %19, %21 : tensor<1x10xf32>
     return %22 : tensor<1x10xf32>
   }
 }

diff --git a/iree/test/e2e/models/unidirectional_lstm.mlir b/iree/test/e2e/models/unidirectional_lstm.mlir
index 8e7b4e9..cfa4fb1 100644
--- a/iree/test/e2e/models/unidirectional_lstm.mlir
+++ b/iree/test/e2e/models/unidirectional_lstm.mlir

@@ -10,240 +10,240 @@
 // some calls from @main and the call graphs of the removed callees.
 
 func @Min_reduction.47(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> attributes { sym_visibility = "private" } {
-  %0 = xla_hlo.minimum %arg0, %arg1 : tensor<f32>
+  %0 = mhlo.minimum %arg0, %arg1 : tensor<f32>
   return %0 : tensor<f32>
 }
 func @Max_reduction.51(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> attributes { sym_visibility = "private" } {
-  %0 = xla_hlo.maximum %arg0, %arg1 : tensor<i32>
+  %0 = mhlo.maximum %arg0, %arg1 : tensor<i32>
   return %0 : tensor<i32>
 }
 func @Max_1_reduction.55(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> attributes { sym_visibility = "private" } {
-  %0 = xla_hlo.maximum %arg0, %arg1 : tensor<i32>
+  %0 = mhlo.maximum %arg0, %arg1 : tensor<i32>
   return %0 : tensor<i32>
 }
 func @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tuple<tensor<i1>> attributes { sym_visibility = "private" } {
-  %0 = "xla_hlo.get_tuple_element"(%arg0) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-  %1 = "xla_hlo.get_tuple_element"(%arg0) {index = 1 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-  %2 = "xla_hlo.compare"(%0, %1) {comparison_direction = "LT"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
-  %3 = "xla_hlo.tuple"(%2) : (tensor<i1>) -> tuple<tensor<i1>>
+  %0 = "mhlo.get_tuple_element"(%arg0) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+  %1 = "mhlo.get_tuple_element"(%arg0) {index = 1 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+  %2 = "mhlo.compare"(%0, %1) {comparison_direction = "LT"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
+  %3 = "mhlo.tuple"(%2) : (tensor<i1>) -> tuple<tensor<i1>>
   return %3 : tuple<tensor<i1>>
 }
 func @Forward_o16DF3vQKaI__disable_call_shape_inference_true_.189(%arg0: tensor<1x10xf32>, %arg1: tensor<1x10xf32>, %arg2: tensor<5x1x64xf32>, %arg3: tensor<5x1x1xf32>, %arg4: tensor<5x1x1xf32>) -> tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>> attributes { sym_visibility = "private" } {
   %cst = constant  dense<5> : tensor<i32>
-  %0 = "xla_hlo.convert"(%arg3) : (tensor<5x1x1xf32>) -> tensor<5x1x1xf32>
+  %0 = "mhlo.convert"(%arg3) : (tensor<5x1x1xf32>) -> tensor<5x1x1xf32>
   %cst_0 = constant dense<0x7F800000> : tensor<f32>
-  %1 = "xla_hlo.convert"(%cst_0) : (tensor<f32>) -> tensor<f32>
-  %2 = "xla_hlo.reduce"(%0, %1) ( {
+  %1 = "mhlo.convert"(%cst_0) : (tensor<f32>) -> tensor<f32>
+  %2 = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg5: tensor<f32>, %arg6: tensor<f32>):
-    %42 = xla_hlo.minimum %arg5, %arg6 : tensor<f32>
-    "xla_hlo.return"(%42) : (tensor<f32>) -> ()
+    %42 = mhlo.minimum %arg5, %arg6 : tensor<f32>
+    "mhlo.return"(%42) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
-  %3 = "xla_hlo.convert"(%2) : (tensor<5xf32>) -> tensor<5xf32>
+  %3 = "mhlo.convert"(%2) : (tensor<5xf32>) -> tensor<5xf32>
   %cst_1 = constant  dense<0.000000e+00> : tensor<f32>
-  %4 = "xla_hlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
-  %5 = "xla_hlo.compare"(%3, %4) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
-  %6 = "xla_hlo.convert"(%5) : (tensor<5xi1>) -> tensor<5xi32>
+  %4 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
+  %5 = "mhlo.compare"(%3, %4) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
+  %6 = "mhlo.convert"(%5) : (tensor<5xi1>) -> tensor<5xi32>
   %cst_2 = constant  dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
-  %7 = xla_hlo.multiply %6, %cst_2 : tensor<5xi32>
-  %8 = "xla_hlo.convert"(%7) : (tensor<5xi32>) -> tensor<5xi32>
+  %7 = mhlo.multiply %6, %cst_2 : tensor<5xi32>
+  %8 = "mhlo.convert"(%7) : (tensor<5xi32>) -> tensor<5xi32>
   %cst_3 = constant dense<-2147483648> : tensor<i32>
-  %9 = "xla_hlo.convert"(%cst_3) : (tensor<i32>) -> tensor<i32>
-  %10 = "xla_hlo.reduce"(%8, %9) ( {
+  %9 = "mhlo.convert"(%cst_3) : (tensor<i32>) -> tensor<i32>
+  %10 = "mhlo.reduce"(%8, %9) ( {
   ^bb0(%arg5: tensor<i32>, %arg6: tensor<i32>):
-    %42 = xla_hlo.maximum %arg5, %arg6 : tensor<i32>
-    "xla_hlo.return"(%42) : (tensor<i32>) -> ()
+    %42 = mhlo.maximum %arg5, %arg6 : tensor<i32>
+    "mhlo.return"(%42) : (tensor<i32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xi32>, tensor<i32>) -> tensor<i32>
-  %11 = "xla_hlo.convert"(%10) : (tensor<i32>) -> tensor<i32>
-  %12 = xla_hlo.subtract %cst, %11 : tensor<i32>
+  %11 = "mhlo.convert"(%10) : (tensor<i32>) -> tensor<i32>
+  %12 = mhlo.subtract %cst, %11 : tensor<i32>
   %cst_4 = constant dense<5> : tensor<i32>
-  %13 = "xla_hlo.compare"(%12, %cst_4) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+  %13 = "mhlo.compare"(%12, %cst_4) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
   %cst_5 = constant dense<0> : tensor<i32>
   %cst_6 = constant dense<5> : tensor<i32>
-  %14 = "xla_hlo.reverse"(%3) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
+  %14 = "mhlo.reverse"(%3) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xf32>) -> tensor<5xf32>
   %cst_7 = constant dense<0.000000e+00> : tensor<f32>
-  %15 = "xla_hlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
-  %16 = "xla_hlo.compare"(%14, %15) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
-  %17 = "xla_hlo.convert"(%16) : (tensor<5xi1>) -> tensor<5xi32>
+  %15 = "mhlo.broadcast_in_dim"(%cst_7) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5xf32>
+  %16 = "mhlo.compare"(%14, %15) {comparison_direction = "EQ"} : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xi1>
+  %17 = "mhlo.convert"(%16) : (tensor<5xi1>) -> tensor<5xi32>
   %cst_8 = constant  dense<[1, 2, 3, 4, 5]> : tensor<5xi32>
-  %18 = xla_hlo.multiply %17, %cst_8 : tensor<5xi32>
-  %19 = "xla_hlo.convert"(%18) : (tensor<5xi32>) -> tensor<5xi32>
+  %18 = mhlo.multiply %17, %cst_8 : tensor<5xi32>
+  %19 = "mhlo.convert"(%18) : (tensor<5xi32>) -> tensor<5xi32>
   %cst_9 = constant dense<-2147483648> : tensor<i32>
-  %20 = "xla_hlo.convert"(%cst_9) : (tensor<i32>) -> tensor<i32>
-  %21 = "xla_hlo.reduce"(%19, %20) ( {
+  %20 = "mhlo.convert"(%cst_9) : (tensor<i32>) -> tensor<i32>
+  %21 = "mhlo.reduce"(%19, %20) ( {
   ^bb0(%arg5: tensor<i32>, %arg6: tensor<i32>):
-    %42 = xla_hlo.maximum %arg5, %arg6 : tensor<i32>
-    "xla_hlo.return"(%42) : (tensor<i32>) -> ()
+    %42 = mhlo.maximum %arg5, %arg6 : tensor<i32>
+    "mhlo.return"(%42) : (tensor<i32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<5xi32>, tensor<i32>) -> tensor<i32>
-  %22 = "xla_hlo.convert"(%21) : (tensor<i32>) -> tensor<i32>
-  %23 = xla_hlo.subtract %cst_6, %22 : tensor<i32>
-  %24 = "xla_hlo.select"(%13, %cst_5, %23) : (tensor<i1>, tensor<i32>, tensor<i32>) -> tensor<i32>
-  %25 = "xla_hlo.convert"(%24) : (tensor<i32>) -> tensor<i64>
+  %22 = "mhlo.convert"(%21) : (tensor<i32>) -> tensor<i32>
+  %23 = mhlo.subtract %cst_6, %22 : tensor<i32>
+  %24 = "mhlo.select"(%13, %cst_5, %23) : (tensor<i1>, tensor<i32>, tensor<i32>) -> tensor<i32>
+  %25 = "mhlo.convert"(%24) : (tensor<i32>) -> tensor<i64>
   %cst_10 = constant dense<5> : tensor<i32>
-  %26 = xla_hlo.subtract %cst_10, %12 : tensor<i32>
-  %27 = "xla_hlo.convert"(%26) : (tensor<i32>) -> tensor<i64>
+  %26 = mhlo.subtract %cst_10, %12 : tensor<i32>
+  %27 = "mhlo.convert"(%26) : (tensor<i32>) -> tensor<i64>
   %cst_11 = constant dense<0.000000e+00> : tensor<f32>
-  %28 = "xla_hlo.broadcast_in_dim"(%cst_11) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<40xf32>
+  %28 = "mhlo.broadcast_in_dim"(%cst_11) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<40xf32>
   %cst_12 = constant  dense<0> : tensor<i64>
   %cst_13 = constant  dense<0.42> : tensor<74x40xf32>
   %cst_14 = constant  dense<0> : tensor<i64>
   %cst_15 = constant  dense<0> : tensor<i64>
-  %29 = "xla_hlo.broadcast_in_dim"(%cst_15) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<i64>) -> tensor<5xi64>
+  %29 = "mhlo.broadcast_in_dim"(%cst_15) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<i64>) -> tensor<5xi64>
   %cst_16 = constant dense<0.000000e+00> : tensor<f32>
-  %30 = "xla_hlo.broadcast_in_dim"(%cst_16) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x10xf32>
+  %30 = "mhlo.broadcast_in_dim"(%cst_16) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x10xf32>
   %cst_17 = constant dense<0.000000e+00> : tensor<f32>
-  %31 = "xla_hlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x10xf32>
-  %32 = "xla_hlo.tuple"(%25, %27, %28, %cst_12, %cst_13, %cst_14, %arg0, %arg1, %arg2, %arg3, %arg4, %29, %30, %31) : (tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>) -> tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>
-  %33 = "xla_hlo.while"(%32) ( {
+  %31 = "mhlo.broadcast_in_dim"(%cst_17) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x10xf32>
+  %32 = "mhlo.tuple"(%25, %27, %28, %cst_12, %cst_13, %cst_14, %arg0, %arg1, %arg2, %arg3, %arg4, %29, %30, %31) : (tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>) -> tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>
+  %33 = "mhlo.while"(%32) ( {
   ^bb0(%arg5: tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>):
     %42 = call @ForwardLoopCond_gFAnjWGSoLs__.167(%arg5) : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tuple<tensor<i1>>
-    %43 = "xla_hlo.get_tuple_element"(%42) {index = 0 : i32} : (tuple<tensor<i1>>) -> tensor<i1>
-    "xla_hlo.return"(%43) : (tensor<i1>) -> ()
+    %43 = "mhlo.get_tuple_element"(%42) {index = 0 : i32} : (tuple<tensor<i1>>) -> tensor<i1>
+    "mhlo.return"(%43) : (tensor<i1>) -> ()
   },  {
   ^bb0(%arg5: tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>):
-    %42 = "xla_hlo.get_tuple_element"(%arg5) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+    %42 = "mhlo.get_tuple_element"(%arg5) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
     %cst_18 = constant dense<1> : tensor<i64>
-    %43 = xla_hlo.add %42, %cst_18 : tensor<i64>
-    %44 = "xla_hlo.get_tuple_element"(%arg5) {index = 1 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-    %45 = "xla_hlo.get_tuple_element"(%arg5) {index = 2 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<40xf32>
-    %46 = "xla_hlo.get_tuple_element"(%arg5) {index = 3 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-    %47 = "xla_hlo.get_tuple_element"(%arg5) {index = 4 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<74x40xf32>
-    %48 = "xla_hlo.get_tuple_element"(%arg5) {index = 5 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-    %49 = "xla_hlo.get_tuple_element"(%arg5) {index = 9 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x1xf32>
-    %50 = "xla_hlo.gather"(%49, %42) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 0 : i64, offset_dims = dense<[0, 1]> : tensor<2xi64>, start_index_map = dense<0> : tensor<1xi64>}, slice_sizes = dense<1> : tensor<3xi64>, start_index_map = dense<0> : tensor<1xi64>} : (tensor<5x1x1xf32>, tensor<i64>) -> tensor<1x1xf32>
-    %51 = "xla_hlo.reshape"(%50) : (tensor<1x1xf32>) -> tensor<1xf32>
-    %52 = "xla_hlo.broadcast_in_dim"(%51) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %43 = mhlo.add %42, %cst_18 : tensor<i64>
+    %44 = "mhlo.get_tuple_element"(%arg5) {index = 1 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+    %45 = "mhlo.get_tuple_element"(%arg5) {index = 2 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<40xf32>
+    %46 = "mhlo.get_tuple_element"(%arg5) {index = 3 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+    %47 = "mhlo.get_tuple_element"(%arg5) {index = 4 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<74x40xf32>
+    %48 = "mhlo.get_tuple_element"(%arg5) {index = 5 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+    %49 = "mhlo.get_tuple_element"(%arg5) {index = 9 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x1xf32>
+    %50 = "mhlo.gather"(%49, %42) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 0 : i64, offset_dims = dense<[0, 1]> : tensor<2xi64>, start_index_map = dense<0> : tensor<1xi64>}, slice_sizes = dense<1> : tensor<3xi64>, start_index_map = dense<0> : tensor<1xi64>} : (tensor<5x1x1xf32>, tensor<i64>) -> tensor<1x1xf32>
+    %51 = "mhlo.reshape"(%50) : (tensor<1x1xf32>) -> tensor<1xf32>
+    %52 = "mhlo.broadcast_in_dim"(%51) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
     %cst_19 = constant dense<1.000000e+00> : tensor<f32>
-    %53 = "xla_hlo.broadcast_in_dim"(%cst_19) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %54 = xla_hlo.multiply %52, %53 : tensor<1x10xf32>
+    %53 = "mhlo.broadcast_in_dim"(%cst_19) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %54 = mhlo.multiply %52, %53 : tensor<1x10xf32>
     %cst_20 = constant dense<0.000000e+00> : tensor<f32>
-    %55 = "xla_hlo.broadcast_in_dim"(%cst_20) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %56 = "xla_hlo.compare"(%54, %55) {comparison_direction = "GT"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xi1>
-    %57 = "xla_hlo.get_tuple_element"(%arg5) {index = 6 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
+    %55 = "mhlo.broadcast_in_dim"(%cst_20) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %56 = "mhlo.compare"(%54, %55) {comparison_direction = "GT"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xi1>
+    %57 = "mhlo.get_tuple_element"(%arg5) {index = 6 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
     %cst_21 = constant dense<5.000000e-01> : tensor<f32>
-    %58 = "xla_hlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %59 = "xla_hlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %60 = "xla_hlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %61 = "xla_hlo.get_tuple_element"(%arg5) {index = 8 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x64xf32>
-    %62 = "xla_hlo.gather"(%61, %42) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 0 : i64, offset_dims = dense<[0, 1]> : tensor<2xi64>, start_index_map = dense<0> : tensor<1xi64>}, slice_sizes = dense<[1, 1, 64]> : tensor<3xi64>} : (tensor<5x1x64xf32>, tensor<i64>) -> tensor<1x64xf32>
-    %63 = "xla_hlo.get_tuple_element"(%arg5) {index = 7 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
-    %64 = "xla_hlo.concatenate"(%62, %63) {dimension = 1 : i64} : (tensor<1x64xf32>, tensor<1x10xf32>) -> tensor<1x74xf32>
-    %65 = "xla_hlo.dot"(%64, %47) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x74xf32>, tensor<74x40xf32>) -> tensor<1x40xf32>
-    %66 = "xla_hlo.transpose"(%65) {permutation = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x40xf32>
-    %67 = "xla_hlo.reshape"(%45) : (tensor<40xf32>) -> tensor<1x40xf32>
-    %68 = xla_hlo.add %66, %67 : tensor<1x40xf32>
-    %69 = "xla_hlo.slice"(%68) {limit_indices = dense<[1, 30]> : tensor<2xi64>, start_indices = dense<[0, 20]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
-    %70 = xla_hlo.multiply %60, %69 : tensor<1x10xf32>
-    %71 = "xla_hlo.tanh"(%70) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %72 = xla_hlo.multiply %59, %71 : tensor<1x10xf32>
-    %73 = xla_hlo.add %58, %72 : tensor<1x10xf32>
-    %74 = xla_hlo.multiply %73, %57 : tensor<1x10xf32>
+    %58 = "mhlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %59 = "mhlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %60 = "mhlo.broadcast_in_dim"(%cst_21) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %61 = "mhlo.get_tuple_element"(%arg5) {index = 8 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x64xf32>
+    %62 = "mhlo.gather"(%61, %42) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 0 : i64, offset_dims = dense<[0, 1]> : tensor<2xi64>, start_index_map = dense<0> : tensor<1xi64>}, slice_sizes = dense<[1, 1, 64]> : tensor<3xi64>} : (tensor<5x1x64xf32>, tensor<i64>) -> tensor<1x64xf32>
+    %63 = "mhlo.get_tuple_element"(%arg5) {index = 7 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
+    %64 = "mhlo.concatenate"(%62, %63) {dimension = 1 : i64} : (tensor<1x64xf32>, tensor<1x10xf32>) -> tensor<1x74xf32>
+    %65 = "mhlo.dot"(%64, %47) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x74xf32>, tensor<74x40xf32>) -> tensor<1x40xf32>
+    %66 = "mhlo.transpose"(%65) {permutation = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x40xf32>
+    %67 = "mhlo.reshape"(%45) : (tensor<40xf32>) -> tensor<1x40xf32>
+    %68 = mhlo.add %66, %67 : tensor<1x40xf32>
+    %69 = "mhlo.slice"(%68) {limit_indices = dense<[1, 30]> : tensor<2xi64>, start_indices = dense<[0, 20]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
+    %70 = mhlo.multiply %60, %69 : tensor<1x10xf32>
+    %71 = "mhlo.tanh"(%70) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %72 = mhlo.multiply %59, %71 : tensor<1x10xf32>
+    %73 = mhlo.add %58, %72 : tensor<1x10xf32>
+    %74 = mhlo.multiply %73, %57 : tensor<1x10xf32>
     %cst_22 = constant dense<5.000000e-01> : tensor<f32>
-    %75 = "xla_hlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %76 = "xla_hlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %77 = "xla_hlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %78 = "xla_hlo.slice"(%68) {limit_indices = dense<[1, 20]> : tensor<2xi64>, start_indices = dense<[0, 10]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
-    %79 = xla_hlo.multiply %77, %78 : tensor<1x10xf32>
-    %80 = "xla_hlo.tanh"(%79) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %81 = xla_hlo.multiply %76, %80 : tensor<1x10xf32>
-    %82 = xla_hlo.add %75, %81 : tensor<1x10xf32>
-    %83 = "xla_hlo.slice"(%68) {limit_indices = dense<[1, 10]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
-    %84 = "xla_hlo.tanh"(%83) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %85 = xla_hlo.multiply %82, %84 : tensor<1x10xf32>
-    %86 = xla_hlo.add %74, %85 : tensor<1x10xf32>
+    %75 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %76 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %77 = "mhlo.broadcast_in_dim"(%cst_22) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %78 = "mhlo.slice"(%68) {limit_indices = dense<[1, 20]> : tensor<2xi64>, start_indices = dense<[0, 10]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
+    %79 = mhlo.multiply %77, %78 : tensor<1x10xf32>
+    %80 = "mhlo.tanh"(%79) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %81 = mhlo.multiply %76, %80 : tensor<1x10xf32>
+    %82 = mhlo.add %75, %81 : tensor<1x10xf32>
+    %83 = "mhlo.slice"(%68) {limit_indices = dense<[1, 10]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
+    %84 = "mhlo.tanh"(%83) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %85 = mhlo.multiply %82, %84 : tensor<1x10xf32>
+    %86 = mhlo.add %74, %85 : tensor<1x10xf32>
     %cst_23 = constant dense<1.000000e+01> : tensor<f32>
-    %87 = "xla_hlo.broadcast_in_dim"(%cst_23) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %88 = xla_hlo.minimum %86, %87 : tensor<1x10xf32>
+    %87 = "mhlo.broadcast_in_dim"(%cst_23) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %88 = mhlo.minimum %86, %87 : tensor<1x10xf32>
     %cst_24 = constant dense<-1.000000e+01> : tensor<f32>
-    %89 = "xla_hlo.broadcast_in_dim"(%cst_24) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %90 = xla_hlo.maximum %88, %89 : tensor<1x10xf32>
-    %91 = "xla_hlo.select"(%56, %57, %90) : (tensor<1x10xi1>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
-    %92 = "xla_hlo.reshape"(%50) : (tensor<1x1xf32>) -> tensor<1xf32>
-    %93 = "xla_hlo.broadcast_in_dim"(%92) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
+    %89 = "mhlo.broadcast_in_dim"(%cst_24) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %90 = mhlo.maximum %88, %89 : tensor<1x10xf32>
+    %91 = "mhlo.select"(%56, %57, %90) : (tensor<1x10xi1>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %92 = "mhlo.reshape"(%50) : (tensor<1x1xf32>) -> tensor<1xf32>
+    %93 = "mhlo.broadcast_in_dim"(%92) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor<1xf32>) -> tensor<1x10xf32>
     %cst_25 = constant dense<1.000000e+00> : tensor<f32>
-    %94 = "xla_hlo.broadcast_in_dim"(%cst_25) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %95 = xla_hlo.multiply %93, %94 : tensor<1x10xf32>
+    %94 = "mhlo.broadcast_in_dim"(%cst_25) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %95 = mhlo.multiply %93, %94 : tensor<1x10xf32>
     %cst_26 = constant dense<0.000000e+00> : tensor<f32>
-    %96 = "xla_hlo.broadcast_in_dim"(%cst_26) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %97 = "xla_hlo.compare"(%95, %96) {comparison_direction = "GT"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xi1>
+    %96 = "mhlo.broadcast_in_dim"(%cst_26) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %97 = "mhlo.compare"(%95, %96) {comparison_direction = "GT"} : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xi1>
     %cst_27 = constant dense<5.000000e-01> : tensor<f32>
-    %98 = "xla_hlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %99 = "xla_hlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %100 = "xla_hlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-    %101 = "xla_hlo.slice"(%68) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
-    %102 = xla_hlo.multiply %100, %101 : tensor<1x10xf32>
-    %103 = "xla_hlo.tanh"(%102) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %104 = xla_hlo.multiply %99, %103 : tensor<1x10xf32>
-    %105 = xla_hlo.add %98, %104 : tensor<1x10xf32>
-    %106 = "xla_hlo.tanh"(%90) : (tensor<1x10xf32>) -> tensor<1x10xf32>
-    %107 = xla_hlo.multiply %105, %106 : tensor<1x10xf32>
-    %108 = "xla_hlo.select"(%97, %63, %107) : (tensor<1x10xi1>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
-    %109 = "xla_hlo.get_tuple_element"(%arg5) {index = 10 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x1xf32>
-    %110 = "xla_hlo.get_tuple_element"(%arg5) {index = 11 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5xi64>
-    %111 = "xla_hlo.reshape"(%48) : (tensor<i64>) -> tensor<1xi64>
-    %112 = "xla_hlo.slice"(%111) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi64>) -> tensor<1xi64>
-    %113 = "xla_hlo.reshape"(%42) : (tensor<i64>) -> tensor<1xi64>
-    %114 = "xla_hlo.concatenate"(%113) {dimension = 0 : i64} : (tensor<1xi64>) -> tensor<1xi64>
-    %115 = "xla_hlo.convert"(%114) : (tensor<1xi64>) -> tensor<1xi32>
-    %116 = "xla_hlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
-    %117 = "xla_hlo.reshape"(%116) : (tensor<1xi32>) -> tensor<i32>
-    %118 = "xla_hlo.dynamic-update-slice"(%110, %112, %117) : (tensor<5xi64>, tensor<1xi64>, tensor<i32>) -> tensor<5xi64>
-    %119 = "xla_hlo.get_tuple_element"(%arg5) {index = 12 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
-    %120 = "xla_hlo.reshape"(%91) : (tensor<1x10xf32>) -> tensor<1x1x10xf32>
-    %121 = "xla_hlo.slice"(%120) {limit_indices = dense<[1, 1, 10]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x1x10xf32>) -> tensor<1x1x10xf32>
-    %122 = "xla_hlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
-    %123 = "xla_hlo.reshape"(%122) : (tensor<1xi32>) -> tensor<i32>
+    %98 = "mhlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %99 = "mhlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %100 = "mhlo.broadcast_in_dim"(%cst_27) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+    %101 = "mhlo.slice"(%68) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
+    %102 = mhlo.multiply %100, %101 : tensor<1x10xf32>
+    %103 = "mhlo.tanh"(%102) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %104 = mhlo.multiply %99, %103 : tensor<1x10xf32>
+    %105 = mhlo.add %98, %104 : tensor<1x10xf32>
+    %106 = "mhlo.tanh"(%90) : (tensor<1x10xf32>) -> tensor<1x10xf32>
+    %107 = mhlo.multiply %105, %106 : tensor<1x10xf32>
+    %108 = "mhlo.select"(%97, %63, %107) : (tensor<1x10xi1>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %109 = "mhlo.get_tuple_element"(%arg5) {index = 10 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x1xf32>
+    %110 = "mhlo.get_tuple_element"(%arg5) {index = 11 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5xi64>
+    %111 = "mhlo.reshape"(%48) : (tensor<i64>) -> tensor<1xi64>
+    %112 = "mhlo.slice"(%111) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi64>) -> tensor<1xi64>
+    %113 = "mhlo.reshape"(%42) : (tensor<i64>) -> tensor<1xi64>
+    %114 = "mhlo.concatenate"(%113) {dimension = 0 : i64} : (tensor<1xi64>) -> tensor<1xi64>
+    %115 = "mhlo.convert"(%114) : (tensor<1xi64>) -> tensor<1xi32>
+    %116 = "mhlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
+    %117 = "mhlo.reshape"(%116) : (tensor<1xi32>) -> tensor<i32>
+    %118 = "mhlo.dynamic-update-slice"(%110, %112, %117) : (tensor<5xi64>, tensor<1xi64>, tensor<i32>) -> tensor<5xi64>
+    %119 = "mhlo.get_tuple_element"(%arg5) {index = 12 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
+    %120 = "mhlo.reshape"(%91) : (tensor<1x10xf32>) -> tensor<1x1x10xf32>
+    %121 = "mhlo.slice"(%120) {limit_indices = dense<[1, 1, 10]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x1x10xf32>) -> tensor<1x1x10xf32>
+    %122 = "mhlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
+    %123 = "mhlo.reshape"(%122) : (tensor<1xi32>) -> tensor<i32>
     %cst_28 = constant dense<0> : tensor<i32>
-    %124 = "xla_hlo.dynamic-update-slice"(%119, %121, %123, %cst_28, %cst_28) : (tensor<5x1x10xf32>, tensor<1x1x10xf32>, tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<5x1x10xf32>
-    %125 = "xla_hlo.get_tuple_element"(%arg5) {index = 13 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
-    %126 = "xla_hlo.reshape"(%108) : (tensor<1x10xf32>) -> tensor<1x1x10xf32>
-    %127 = "xla_hlo.slice"(%126) {limit_indices = dense<[1, 1, 10]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x1x10xf32>) -> tensor<1x1x10xf32>
-    %128 = "xla_hlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
-    %129 = "xla_hlo.reshape"(%128) : (tensor<1xi32>) -> tensor<i32>
+    %124 = "mhlo.dynamic-update-slice"(%119, %121, %123, %cst_28, %cst_28) : (tensor<5x1x10xf32>, tensor<1x1x10xf32>, tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<5x1x10xf32>
+    %125 = "mhlo.get_tuple_element"(%arg5) {index = 13 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
+    %126 = "mhlo.reshape"(%108) : (tensor<1x10xf32>) -> tensor<1x1x10xf32>
+    %127 = "mhlo.slice"(%126) {limit_indices = dense<[1, 1, 10]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<1x1x10xf32>) -> tensor<1x1x10xf32>
+    %128 = "mhlo.slice"(%115) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<1xi32>) -> tensor<1xi32>
+    %129 = "mhlo.reshape"(%128) : (tensor<1xi32>) -> tensor<i32>
     %cst_29 = constant dense<0> : tensor<i32>
-    %130 = "xla_hlo.dynamic-update-slice"(%125, %127, %129, %cst_29, %cst_29) : (tensor<5x1x10xf32>, tensor<1x1x10xf32>, tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<5x1x10xf32>
-    %131 = "xla_hlo.tuple"(%43, %44, %45, %46, %47, %48, %91, %108, %61, %49, %109, %118, %124, %130) : (tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>) -> tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>
-    "xla_hlo.return"(%131) : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> ()
+    %130 = "mhlo.dynamic-update-slice"(%125, %127, %129, %cst_29, %cst_29) : (tensor<5x1x10xf32>, tensor<1x1x10xf32>, tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<5x1x10xf32>
+    %131 = "mhlo.tuple"(%43, %44, %45, %46, %47, %48, %91, %108, %61, %49, %109, %118, %124, %130) : (tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>) -> tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>
+    "mhlo.return"(%131) : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> ()
   }) : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>
-  %34 = "xla_hlo.get_tuple_element"(%33) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-  %35 = "xla_hlo.get_tuple_element"(%33) {index = 11 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5xi64>
-  %36 = "xla_hlo.get_tuple_element"(%33) {index = 12 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
-  %37 = "xla_hlo.get_tuple_element"(%33) {index = 13 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
-  %38 = "xla_hlo.get_tuple_element"(%33) {index = 5 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
-  %39 = "xla_hlo.get_tuple_element"(%33) {index = 6 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
-  %40 = "xla_hlo.get_tuple_element"(%33) {index = 7 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
-  %41 = "xla_hlo.tuple"(%34, %35, %36, %37, %38, %39, %40) : (tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>) -> tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>
+  %34 = "mhlo.get_tuple_element"(%33) {index = 0 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+  %35 = "mhlo.get_tuple_element"(%33) {index = 11 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5xi64>
+  %36 = "mhlo.get_tuple_element"(%33) {index = 12 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
+  %37 = "mhlo.get_tuple_element"(%33) {index = 13 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<5x1x10xf32>
+  %38 = "mhlo.get_tuple_element"(%33) {index = 5 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<i64>
+  %39 = "mhlo.get_tuple_element"(%33) {index = 6 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
+  %40 = "mhlo.get_tuple_element"(%33) {index = 7 : i32} : (tuple<tensor<i64>, tensor<i64>, tensor<40xf32>, tensor<i64>, tensor<74x40xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>>) -> tensor<1x10xf32>
+  %41 = "mhlo.tuple"(%34, %35, %36, %37, %38, %39, %40) : (tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>) -> tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>
   return %41 : tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>
 }
 
 // CHECK-LABEL: EXEC @main
 func @main(%arg0: tensor<1x5xf32>, %arg1: tensor<1x5x2x2xf32>) -> tuple<tensor<5x1x10xf32>> attributes { iree.module.export } {
   %cst = constant dense<0.000000e+00> : tensor<f32>
-  %0 = "xla_hlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %0 = "mhlo.broadcast_in_dim"(%cst) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
   %cst_0 = constant dense<0.000000e+00> : tensor<f32>
-  %1 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %1 = "mhlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
   %cst_1 = constant dense<0.000000e+00> : tensor<f32>
-  %2 = "xla_hlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %2 = "mhlo.broadcast_in_dim"(%cst_1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
   %cst_2 = constant dense<0.000000e+00> : tensor<f32>
-  %3 = "xla_hlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %3 = "mhlo.broadcast_in_dim"(%cst_2) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
   %cst_3 = constant dense<0.000000e+00> : tensor<f32>
-  %4 = "xla_hlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %4 = "mhlo.broadcast_in_dim"(%cst_3) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
   %cst_4 = constant dense<0.000000e+00> : tensor<f32>
-  %5 = "xla_hlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
-  %6 = "xla_hlo.reshape"(%arg1) : (tensor<1x5x2x2xf32>) -> tensor<1x5x2x2xf32>
-  %7 = "xla_hlo.reshape"(%6) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
+  %5 = "mhlo.broadcast_in_dim"(%cst_4) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
+  %6 = "mhlo.reshape"(%arg1) : (tensor<1x5x2x2xf32>) -> tensor<1x5x2x2xf32>
+  %7 = "mhlo.reshape"(%6) : (tensor<1x5x2x2xf32>) -> tensor<1x5x4xf32>
   %cst_5 = constant dense<0.000000e+00> : tensor<f32>
-  %8 = "xla_hlo.pad"(%7, %cst_5) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
-  %9 = "xla_hlo.transpose"(%8) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
-  %10 = "xla_hlo.reshape"(%arg0) : (tensor<1x5xf32>) -> tensor<1x5xf32>
-  %11 = "xla_hlo.transpose"(%10) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
-  %12 = "xla_hlo.reshape"(%11) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
+  %8 = "mhlo.pad"(%7, %cst_5) {edge_padding_high = dense<[0, 0, 60]> : tensor<3xi64>, edge_padding_low = dense<0> : tensor<3xi64>, interior_padding = dense<0> : tensor<3xi64>} : (tensor<1x5x4xf32>, tensor<f32>) -> tensor<1x5x64xf32>
+  %9 = "mhlo.transpose"(%8) {permutation = dense<[1, 0, 2]> : tensor<3xi64>} : (tensor<1x5x64xf32>) -> tensor<5x1x64xf32>
+  %10 = "mhlo.reshape"(%arg0) : (tensor<1x5xf32>) -> tensor<1x5xf32>
+  %11 = "mhlo.transpose"(%10) {permutation = dense<[1, 0]> : tensor<2xi64>} : (tensor<1x5xf32>) -> tensor<5x1xf32>
+  %12 = "mhlo.reshape"(%11) : (tensor<5x1xf32>) -> tensor<5x1x1xf32>
   %cst_6 = constant dense<0.000000e+00> : tensor<f32>
-  %13 = "xla_hlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x1xf32>
+  %13 = "mhlo.broadcast_in_dim"(%cst_6) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<5x1x1xf32>
   %14 = call @Forward_o16DF3vQKaI__disable_call_shape_inference_true_.189(%4, %5, %9, %12, %13) : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<5x1x64xf32>, tensor<5x1x1xf32>, tensor<5x1x1xf32>) -> tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>
-  %21 = "xla_hlo.get_tuple_element"(%14) {index = 3 : i32} : (tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>) -> tensor<5x1x10xf32>
-  %22 = "xla_hlo.copy"(%21) : (tensor<5x1x10xf32>) -> tensor<5x1x10xf32>
-  %23 = "xla_hlo.reshape"(%22) : (tensor<5x1x10xf32>) -> tensor<5x1x10xf32>
-  %24 = "xla_hlo.tuple"(%23) : (tensor<5x1x10xf32>) -> tuple<tensor<5x1x10xf32>>
+  %21 = "mhlo.get_tuple_element"(%14) {index = 3 : i32} : (tuple<tensor<i64>, tensor<5xi64>, tensor<5x1x10xf32>, tensor<5x1x10xf32>, tensor<i64>, tensor<1x10xf32>, tensor<1x10xf32>>) -> tensor<5x1x10xf32>
+  %22 = "mhlo.copy"(%21) : (tensor<5x1x10xf32>) -> tensor<5x1x10xf32>
+  %23 = "mhlo.reshape"(%22) : (tensor<5x1x10xf32>) -> tensor<5x1x10xf32>
+  %24 = "mhlo.tuple"(%23) : (tensor<5x1x10xf32>) -> tuple<tensor<5x1x10xf32>>
   return %24 : tuple<tensor<5x1x10xf32>>
 }
 

diff --git a/iree/test/e2e/regression/dynamic_abs.mlir b/iree/test/e2e/regression/dynamic_abs.mlir
index bcff369..8af3dac 100644
--- a/iree/test/e2e/regression/dynamic_abs.mlir
+++ b/iree/test/e2e/regression/dynamic_abs.mlir

@@ -4,7 +4,7 @@
 // CHECK-LABEL: EXEC @dynamic_tensor
 func @dynamic_tensor() -> tensor<?x?xf32> attributes { iree.module.export } {
   %input = iree.dynamic_shape_constant dense<[[-1.0, 2.0, -3.0], [4.0, -5.0, 6.0]]> : tensor<2x3xf32> -> tensor<?x?xf32>
-  %res = "xla_hlo.abs"(%input) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+  %res = "mhlo.abs"(%input) : (tensor<?x?xf32>) -> tensor<?x?xf32>
   return %res : tensor<?x?xf32>
 }
 

diff --git a/iree/test/e2e/regression/dynamic_add.mlir b/iree/test/e2e/regression/dynamic_add.mlir
index 243b79a..0c97386 100644
--- a/iree/test/e2e/regression/dynamic_add.mlir
+++ b/iree/test/e2e/regression/dynamic_add.mlir

@@ -5,6 +5,6 @@
 // CHECK: 2x4xf32=[6 8 10 12][-6 -8 -10 -12]
 
 func @main(%arg0: tensor<?x4xf32>, %arg1: tensor<?x4xf32>) -> tensor<?x4xf32> attributes {iree.module.export} {
-  %0 = "xla_hlo.add"(%arg0, %arg1) : (tensor<?x4xf32>, tensor<?x4xf32>) -> tensor<?x4xf32>
+  %0 = "mhlo.add"(%arg0, %arg1) : (tensor<?x4xf32>, tensor<?x4xf32>) -> tensor<?x4xf32>
   return %0: tensor<?x4xf32>
 }

diff --git a/iree/test/e2e/regression/dynamic_compare_and_select.mlir b/iree/test/e2e/regression/dynamic_compare_and_select.mlir
index fa7c74d..253ad6c 100644
--- a/iree/test/e2e/regression/dynamic_compare_and_select.mlir
+++ b/iree/test/e2e/regression/dynamic_compare_and_select.mlir

@@ -4,8 +4,8 @@
 // CHECK: 10xi32=9 8 7 6 5 4 3 2 1 0
 
 func @main(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: tensor<?xi32>, %arg3: tensor<?xi32>) -> tensor<?xi32> attributes {iree.module.export} {
-    %1 = "xla_hlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi1>
-    %2 = "xla_hlo.select"(%1, %arg2, %arg3) : (tensor<?xi1>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
+    %1 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = "LT"} : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi1>
+    %2 = "mhlo.select"(%1, %arg2, %arg3) : (tensor<?xi1>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
     return %2 : tensor<?xi32>
 }
 

diff --git a/iree/test/e2e/regression/dynamic_dot_general.mlir b/iree/test/e2e/regression/dynamic_dot_general.mlir
index 488136f..e439c4d 100644
--- a/iree/test/e2e/regression/dynamic_dot_general.mlir
+++ b/iree/test/e2e/regression/dynamic_dot_general.mlir

@@ -7,7 +7,7 @@
 func @basic_dot(
   %lhs: tensor<?x?xf32>, %rhs: tensor<?x?xf32>,
   %unused0: tensor<?x?x?xf32>, %unused1: tensor<?x?x?xf32>) -> tensor<?x?xf32> {
-  %0 = "xla_hlo.dot_general"(%lhs, %rhs) {dot_dimension_numbers={
+  %0 = "mhlo.dot_general"(%lhs, %rhs) {dot_dimension_numbers={
     lhs_batching_dimensions = dense<[]> : tensor<0xi64>,
     lhs_contracting_dimensions = dense<1> : tensor<1xi64>,
     rhs_batching_dimensions = dense<[]> : tensor<0xi64>,
@@ -22,7 +22,7 @@
 func @batch_dimension(
   %unused0: tensor<?x?xf32>, %unused1: tensor<?x?xf32>,
   %lhs: tensor<?x?x?xf32>, %rhs: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
-  %0 = "xla_hlo.dot_general"(%lhs, %rhs) {dot_dimension_numbers={
+  %0 = "mhlo.dot_general"(%lhs, %rhs) {dot_dimension_numbers={
     lhs_batching_dimensions = dense<[0]> : tensor<1xi64>,
     lhs_contracting_dimensions = dense<[2]> : tensor<1xi64>,
     rhs_batching_dimensions = dense<[0]> : tensor<1xi64>,

diff --git a/iree/test/e2e/regression/globals.mlir b/iree/test/e2e/regression/globals.mlir
index 4f43f6d..e44d0e2 100644
--- a/iree/test/e2e/regression/globals.mlir
+++ b/iree/test/e2e/regression/globals.mlir

@@ -15,7 +15,7 @@
   func @inc() -> tensor<f32> {
     %0 = flow.variable.load @counter : tensor<f32>
     %c1 = constant dense<1.0> : tensor<f32>
-    %1 = xla_hlo.add %0, %c1 : tensor<f32>
+    %1 = mhlo.add %0, %c1 : tensor<f32>
     flow.variable.store %1, @counter : tensor<f32>
     %2 = flow.variable.load @counter : tensor<f32>
     return %2 : tensor<f32>

diff --git a/iree/test/e2e/vulkan_specific/convolution1.mlir b/iree/test/e2e/vulkan_specific/convolution1.mlir
index 807e517..d0fc606 100644
--- a/iree/test/e2e/vulkan_specific/convolution1.mlir
+++ b/iree/test/e2e/vulkan_specific/convolution1.mlir

@@ -8,7 +8,7 @@
       [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
       [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
       [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
         batch_group_count = 1 : i64,
         dimension_numbers = {
           input_batch_dimension = 0 : i64,
@@ -40,7 +40,7 @@
       [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
       [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
       [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
        batch_group_count = 1 : i64,
        dimension_numbers = {
          input_batch_dimension = 0 : i64,

diff --git a/iree/test/e2e/vulkan_specific/convolution2.mlir b/iree/test/e2e/vulkan_specific/convolution2.mlir
index 63ece8d..ce88d5d 100644
--- a/iree/test/e2e/vulkan_specific/convolution2.mlir
+++ b/iree/test/e2e/vulkan_specific/convolution2.mlir

@@ -11,7 +11,7 @@
   %weights = iree.unfoldable_constant dense<[
       [[[1.0]], [[2.0]], [[3.0]]],
       [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
        batch_group_count = 1 : i64,
        dimension_numbers = {
          input_batch_dimension = 0 : i64,
@@ -101,7 +101,7 @@
        [[ 91.0,  92.0,  93.0,  94.0,  95.0,  96.0],
         [ 97.0,  98.0,  99.0, 100.0, 101.0, 102.0],
         [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
        batch_group_count = 1 : i64,
        dimension_numbers = {
          input_batch_dimension = 0 : i64,

diff --git a/iree/test/e2e/vulkan_specific/pw_add_multiwg.mlir b/iree/test/e2e/vulkan_specific/pw_add_multiwg.mlir
index 0351fd8..df6d95c 100644
--- a/iree/test/e2e/vulkan_specific/pw_add_multiwg.mlir
+++ b/iree/test/e2e/vulkan_specific/pw_add_multiwg.mlir

@@ -7,7 +7,7 @@
                                        [18, 20, 22, 24, 26, 28, 30, 32],
                                        [34, 36, 38, 40, 42, 44, 46, 48],
                                        [50, 52, 54, 56, 58, 60, 62, 64]]> : tensor<4x8xi32>
-  %2 = "xla_hlo.add"(%0, %1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
+  %2 = "mhlo.add"(%0, %1) : (tensor<4x8xi32>, tensor<4x8xi32>) -> tensor<4x8xi32>
   check.expect_eq_const(%2, dense<[[3, 6, 9, 12, 15, 18, 21, 24],
                                    [27, 30, 33, 36, 39, 42, 45, 48],
                                    [51, 54, 57, 60, 63, 66, 69, 72],

diff --git a/iree/test/e2e/vulkan_specific/reduce.mlir b/iree/test/e2e/vulkan_specific/reduce.mlir
index 8582503..bbfd105 100644
--- a/iree/test/e2e/vulkan_specific/reduce.mlir
+++ b/iree/test/e2e/vulkan_specific/reduce.mlir

@@ -1,10 +1,10 @@
 func @reduce_dim_1() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]> : tensor<2x5xi32>
   %1 = iree.unfoldable_constant dense<10> : tensor<i32>
-  %2 = "xla_hlo.reduce"(%0, %1) ( {
+  %2 = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0 : tensor<i32>, %arg1 : tensor<i32>):
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x5xi32>, tensor<i32>) -> tensor<2xi32>
   check.expect_eq_const(%2, dense<[25, 50]> : tensor<2xi32>) : tensor<2xi32>
   return
@@ -15,10 +15,10 @@
 func @reduce_dim_1_const() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]> : tensor<2x5xi32>
   %1 = constant dense<10> : tensor<i32>
-  %2 = "xla_hlo.reduce"(%0, %1) ( {
+  %2 = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0 : tensor<i32>, %arg1 : tensor<i32>):
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x5xi32>, tensor<i32>) -> tensor<2xi32>
   check.expect_eq_const(%2, dense<[25, 50]> : tensor<2xi32>) : tensor<2xi32>
   return
@@ -27,10 +27,10 @@
 func @reduce_dim_0() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]> : tensor<1x10xi32>
   %1 = iree.unfoldable_constant dense<10> : tensor<i32>
-  %2 = "xla_hlo.reduce"(%0, %1) ( {
+  %2 = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0 : tensor<i32>, %arg1 : tensor<i32>):
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xi32>, tensor<i32>) -> tensor<1xi32>
   check.expect_eq_const(%2, dense<[65]> : tensor<1xi32>) : tensor<1xi32>
   return
@@ -39,10 +39,10 @@
 func @reduce_to_scalar() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : tensor<10xi32>
   %1 = iree.unfoldable_constant dense<10> : tensor<i32>
-  %2 = "xla_hlo.reduce"(%0, %1) ( {
+  %2 = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0 : tensor<i32>, %arg1 : tensor<i32>):
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<10xi32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%2, dense<65> : tensor<i32>) : tensor<i32>
   return

diff --git a/iree/test/e2e/xla_ops/BUILD b/iree/test/e2e/xla_ops/BUILD
index 31967b0..744d0a4 100644
--- a/iree/test/e2e/xla_ops/BUILD
+++ b/iree/test/e2e/xla_ops/BUILD

@@ -57,7 +57,7 @@
         # operation. Lowering from XLA -> linalg should be easy fix.
         # "floor.mlir",
 
-        # TODO(#1694): Enable after xla_hlo.gather can be lowered to linalg.
+        # TODO(#1694): Enable after mhlo.gather can be lowered to linalg.
         # "gather.mlir",
         # "gather_concat.mlir",
         #

diff --git a/iree/test/e2e/xla_ops/abs.mlir b/iree/test/e2e/xla_ops/abs.mlir
index 61afd90..25dd318 100644
--- a/iree/test/e2e/xla_ops/abs.mlir
+++ b/iree/test/e2e/xla_ops/abs.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-1.0, -2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.abs"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.abs"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<-4.0> : tensor<f32>
-  %result = "xla_hlo.abs"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.abs"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<4.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/add.mlir b/iree/test/e2e/xla_ops/add.mlir
index aa7eda1..6a57bb6 100644
--- a/iree/test/e2e/xla_ops/add.mlir
+++ b/iree/test/e2e/xla_ops/add.mlir

@@ -1,7 +1,7 @@
 func @tensor() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
   %1 = iree.unfoldable_constant dense<[5.0, 6.0, 7.0, 8.0]> : tensor<4xf32>
-  %result = "xla_hlo.add"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.add"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[6.0, 8.0, 10.0, 12.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/batch_norm_inference.mlir b/iree/test/e2e/xla_ops/batch_norm_inference.mlir
index fbfe3c7..81e2453 100644
--- a/iree/test/e2e/xla_ops/batch_norm_inference.mlir
+++ b/iree/test/e2e/xla_ops/batch_norm_inference.mlir

@@ -4,7 +4,7 @@
   %var = iree.unfoldable_constant dense<[2.0, 2.0, 2.0, 2.0]> : tensor<4xf32>
   %offset = iree.unfoldable_constant dense<[1.0, 1.0, 1.0, 1.0]> : tensor<4xf32>
   %scale = iree.unfoldable_constant dense<[1.0, 1.0, 1.0, 1.0]> : tensor<4xf32>
-  %result = "xla_hlo.batch_norm_inference"(%x, %mean, %var, %offset, %scale) {epsilon = 1.000000e-03 : f32, feature_index = 1 : i64} : (tensor<2x4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<2x4xf32>
+  %result = "mhlo.batch_norm_inference"(%x, %mean, %var, %offset, %scale) {epsilon = 1.000000e-03 : f32, feature_index = 1 : i64} : (tensor<2x4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<2x4xf32>
   // TODO(b/146898896): This should probably be a fuzzier check with round values.
   check.expect_almost_eq_const(%result, dense<[
       [2.0, 2.9995, 3.999, 4.9985],

diff --git a/iree/test/e2e/xla_ops/broadcast.mlir b/iree/test/e2e/xla_ops/broadcast.mlir
index de52134..8817290 100644
--- a/iree/test/e2e/xla_ops/broadcast.mlir
+++ b/iree/test/e2e/xla_ops/broadcast.mlir

@@ -1,7 +1,7 @@
 func @broadcast_2D_3D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3, 4],
                                            [5, 6, 7, 8]]> : tensor<2x4xi32>
-  %result = "xla_hlo.broadcast"(%input) {broadcast_sizes = dense<3> : tensor<1xi64>} : (tensor<2x4xi32>) -> tensor<3x2x4xi32>
+  %result = "mhlo.broadcast"(%input) {broadcast_sizes = dense<3> : tensor<1xi64>} : (tensor<2x4xi32>) -> tensor<3x2x4xi32>
   check.expect_eq_const(%result, dense<[
       [[1, 2, 3, 4], [5, 6, 7, 8]],
       [[1, 2, 3, 4], [5, 6, 7, 8]],
@@ -11,7 +11,7 @@
 
 func @broadcast_3D_scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<42> : tensor<i32>
-  %result = "xla_hlo.broadcast"(%input) {broadcast_sizes = dense<[3, 2, 4]> : tensor<3xi64>} : (tensor<i32>) -> tensor<3x2x4xi32>
+  %result = "mhlo.broadcast"(%input) {broadcast_sizes = dense<[3, 2, 4]> : tensor<3xi64>} : (tensor<i32>) -> tensor<3x2x4xi32>
   check.expect_eq_const(%result, dense<[
       [[42, 42, 42, 42], [42, 42, 42, 42]],
       [[42, 42, 42, 42], [42, 42, 42, 42]],

diff --git a/iree/test/e2e/xla_ops/broadcast_in_dim.mlir b/iree/test/e2e/xla_ops/broadcast_in_dim.mlir
index 53a50f1..825e16e 100644
--- a/iree/test/e2e/xla_ops/broadcast_in_dim.mlir
+++ b/iree/test/e2e/xla_ops/broadcast_in_dim.mlir

@@ -1,7 +1,7 @@
 func @broadcast_in_dim_2D_3D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3, 4],
                                            [5, 6, 7, 8]]> : tensor<2x4xi32>
-  %res = "xla_hlo.broadcast_in_dim"(%input) {broadcast_dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<2x4xi32>) -> tensor<3x2x4xi32>
+  %res = "mhlo.broadcast_in_dim"(%input) {broadcast_dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<2x4xi32>) -> tensor<3x2x4xi32>
   check.expect_eq_const(%res, dense<[
       [[1, 2, 3, 4], [5, 6, 7, 8]],
       [[1, 2, 3, 4], [5, 6, 7, 8]],
@@ -11,7 +11,7 @@
 
 func @broadcast_in_dim_3D_scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<42> : tensor<i32>
-  %res = "xla_hlo.broadcast_in_dim"(%input) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<i32>) -> tensor<3x2x4xi32>
+  %res = "mhlo.broadcast_in_dim"(%input) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<i32>) -> tensor<3x2x4xi32>
   check.expect_eq_const(%res, dense<42> : tensor<3x2x4xi32>) : tensor<3x2x4xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/clamp.mlir b/iree/test/e2e/xla_ops/clamp.mlir
index 8c93826..1129ad2 100644
--- a/iree/test/e2e/xla_ops/clamp.mlir
+++ b/iree/test/e2e/xla_ops/clamp.mlir

@@ -2,7 +2,7 @@
   %min = iree.unfoldable_constant dense<[0, 0, 0, 0]> : tensor<4xi8>
   %val = iree.unfoldable_constant dense<[-2, 4, 8, 12]> : tensor<4xi8>
   %max = iree.unfoldable_constant dense<[10, 10, 10, 10]> : tensor<4xi8>
-  %result = "xla_hlo.clamp"(%min, %val, %max) : (tensor<4xi8>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %result = "mhlo.clamp"(%min, %val, %max) : (tensor<4xi8>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%result, dense<[0, 4, 8, 10]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -11,7 +11,7 @@
   %min = iree.unfoldable_constant dense<[0, 0, 0, 0]> : tensor<4xi16>
   %val = iree.unfoldable_constant dense<[-2, 4, 8, 12]> : tensor<4xi16>
   %max = iree.unfoldable_constant dense<[10, 10, 10, 10]> : tensor<4xi16>
-  %result = "xla_hlo.clamp"(%min, %val, %max) : (tensor<4xi16>, tensor<4xi16>, tensor<4xi16>) -> tensor<4xi16>
+  %result = "mhlo.clamp"(%min, %val, %max) : (tensor<4xi16>, tensor<4xi16>, tensor<4xi16>) -> tensor<4xi16>
   check.expect_eq_const(%result, dense<[0, 4, 8, 10]> : tensor<4xi16>) : tensor<4xi16>
   return
 }
@@ -20,7 +20,7 @@
   %min = iree.unfoldable_constant dense<[0, 0, 0, 0]> : tensor<4xi32>
   %val = iree.unfoldable_constant dense<[-2, 4, 8, 12]> : tensor<4xi32>
   %max = iree.unfoldable_constant dense<[10, 10, 10, 10]> : tensor<4xi32>
-  %result = "xla_hlo.clamp"(%min, %val, %max) : (tensor<4xi32>, tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.clamp"(%min, %val, %max) : (tensor<4xi32>, tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[0, 4, 8, 10]> : tensor<4xi32>) : tensor<4xi32>
   return
 }
@@ -29,7 +29,7 @@
   %min = iree.unfoldable_constant dense<[0.0, 0.0, 0.0, 0.0]> : tensor<4xf32>
   %val = iree.unfoldable_constant dense<[-2.0, 4.0, 8.0, 12.0]> : tensor<4xf32>
   %max = iree.unfoldable_constant dense<[10.0, 10.0, 10.0, 10.0]> : tensor<4xf32>
-  %result = "xla_hlo.clamp"(%min, %val, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.clamp"(%min, %val, %max) : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_eq_const(%result, dense<[0.0, 4.0, 8.0, 10.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/compare.mlir b/iree/test/e2e/xla_ops/compare.mlir
index d6e4c78..099670a 100644
--- a/iree/test/e2e/xla_ops/compare.mlir
+++ b/iree/test/e2e/xla_ops/compare.mlir

@@ -1,10 +1,10 @@
 func @compare_tensor() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[0, 1, 0, 1]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -12,10 +12,10 @@
 func @compare_scalar() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<5> : tensor<i32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -23,10 +23,10 @@
 func @compare_i8() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i8>
   %rhs = iree.unfoldable_constant dense<5> : tensor<i8>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i8>, tensor<i8>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i8>, tensor<i8>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -34,10 +34,10 @@
 func @compare_i16() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i16>
   %rhs = iree.unfoldable_constant dense<5> : tensor<i16>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i16>, tensor<i16>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i16>, tensor<i16>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -45,10 +45,10 @@
 func @compare_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<5> : tensor<i32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -56,10 +56,10 @@
 func @compare_i64() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i64>
   %rhs = iree.unfoldable_constant dense<5> : tensor<i64>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<i64>, tensor<i64>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -67,10 +67,10 @@
 func @compare_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f32>
   %rhs = iree.unfoldable_constant dense<5.0> : tensor<f32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -78,10 +78,10 @@
 func @compare_f64() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f64>
   %rhs = iree.unfoldable_constant dense<5.0> : tensor<f64>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<f64>, tensor<f64>) -> tensor<i1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<f64>, tensor<f64>) -> tensor<i1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<i8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<i1>, tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%output, dense<0> : tensor<i8>) : tensor<i8>
   return
 }
@@ -89,10 +89,10 @@
 func @compare_tensor_odd_length() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7]> : tensor<3xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3]> : tensor<3xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<3xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<3xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<3xi1>, tensor<3xi8>, tensor<3xi8>) -> tensor<3xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<3xi1>, tensor<3xi8>, tensor<3xi8>) -> tensor<3xi8>
   check.expect_eq_const(%output, dense<[0, 1, 0]> : tensor<3xi8>) : tensor<3xi8>
   return
 }
@@ -100,10 +100,10 @@
 func @compare_eq() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "EQ"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[0, 1, 0, 1]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -111,10 +111,10 @@
 func @compare_ne() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "NE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "NE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[1, 0, 1, 0]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -122,10 +122,10 @@
 func @compare_lt() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "LT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "LT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[1, 0, 0, 0]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -133,10 +133,10 @@
 func @compare_le() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "LE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "LE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[1, 1, 0, 1]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -144,10 +144,10 @@
 func @compare_gt() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "GT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "GT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[0, 0, 1, 0]> : tensor<4xi8>) : tensor<4xi8>
   return
 }
@@ -155,10 +155,10 @@
 func @compare_ge() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.compare"(%lhs, %rhs) {comparison_direction = "GE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %result = "mhlo.compare"(%lhs, %rhs) {comparison_direction = "GE"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %c0 = iree.unfoldable_constant dense<0> : tensor<4xi8>
   %c1 = iree.unfoldable_constant dense<1> : tensor<4xi8>
-  %output = "xla_hlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
+  %output = "mhlo.select"(%result, %c1, %c0) : (tensor<4xi1>, tensor<4xi8>, tensor<4xi8>) -> tensor<4xi8>
   check.expect_eq_const(%output, dense<[0, 1, 1, 1]> : tensor<4xi8>) : tensor<4xi8>
   return
 }

diff --git a/iree/test/e2e/xla_ops/concatenate.mlir b/iree/test/e2e/xla_ops/concatenate.mlir
index 114af81..02ac81f 100644
--- a/iree/test/e2e/xla_ops/concatenate.mlir
+++ b/iree/test/e2e/xla_ops/concatenate.mlir

@@ -3,16 +3,16 @@
   %c1 = iree.unfoldable_constant dense<[[5, 6, 7], [8, 9, 10]]> : tensor<2x3xi32>
   %c2 = iree.unfoldable_constant dense<[[11, 12], [13, 14]]> : tensor<2x2xi32>
 
-  %0 = "xla_hlo.concatenate"(%c0, %c1) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>) -> tensor<2x5xi32>
+  %0 = "mhlo.concatenate"(%c0, %c1) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>) -> tensor<2x5xi32>
   check.expect_eq_const(%0, dense<[[1, 2, 5, 6, 7], [3, 4, 8, 9, 10]]> : tensor<2x5xi32>) : tensor<2x5xi32>
 
-  %1 = "xla_hlo.concatenate"(%c1, %c0) {dimension = 1} : (tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x5xi32>
+  %1 = "mhlo.concatenate"(%c1, %c0) {dimension = 1} : (tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x5xi32>
   check.expect_eq_const(%1, dense<[[5, 6, 7, 1, 2], [8, 9, 10, 3, 4]]> : tensor<2x5xi32>) : tensor<2x5xi32>
 
-  %2 = "xla_hlo.concatenate"(%c0, %c1, %c2) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x7xi32>
+  %2 = "mhlo.concatenate"(%c0, %c1, %c2) {dimension = 1} : (tensor<2x2xi32>, tensor<2x3xi32>, tensor<2x2xi32>) -> tensor<2x7xi32>
   check.expect_eq_const(%2, dense<[[1, 2, 5, 6, 7, 11, 12], [3, 4, 8, 9, 10, 13, 14]]> : tensor<2x7xi32>) : tensor<2x7xi32>
 
-  %3 = "xla_hlo.concatenate"(%c0, %c2) {dimension = 0} : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<4x2xi32>
+  %3 = "mhlo.concatenate"(%c0, %c2) {dimension = 0} : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<4x2xi32>
   check.expect_eq_const(%3, dense<[[1, 2], [3, 4], [11, 12], [13, 14]]> : tensor<4x2xi32>) : tensor<4x2xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/constant.mlir b/iree/test/e2e/xla_ops/constant.mlir
index 9cbd098..b12e46c 100644
--- a/iree/test/e2e/xla_ops/constant.mlir
+++ b/iree/test/e2e/xla_ops/constant.mlir

@@ -1,26 +1,26 @@
 func @high_rank () attributes { iree.module.export } {
-  %dense = xla_hlo.constant dense<[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32>
+  %dense = mhlo.constant dense<[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32>
   check.expect_eq_const(%dense, dense<[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32>) : tensor<2x2x3xi32>
 
-  %splat = xla_hlo.constant dense<1> : tensor<2x2x3xi32>
+  %splat = mhlo.constant dense<1> : tensor<2x2x3xi32>
   check.expect_eq_const(%splat, dense<1> : tensor<2x2x3xi32>) : tensor<2x2x3xi32>
   return
 }
 
 func @i8() attributes { iree.module.export } {
-  %c = xla_hlo.constant dense<[1, 2]> : tensor<2xi8>
+  %c = mhlo.constant dense<[1, 2]> : tensor<2xi8>
   check.expect_eq_const(%c, dense<[1, 2]> : tensor<2xi8>) : tensor<2xi8>
   return
 }
 
 func @i32 () attributes { iree.module.export } {
-  %c = xla_hlo.constant dense<[1, 2]> : tensor<2xi32>
+  %c = mhlo.constant dense<[1, 2]> : tensor<2xi32>
   check.expect_eq_const(%c,  dense<[1, 2]> : tensor<2xi32>) : tensor<2xi32>
   return
 }
 
 func @f32 () attributes { iree.module.export } {
-  %c = xla_hlo.constant dense<[1.1, 2.1]> : tensor<2xf32>
+  %c = mhlo.constant dense<[1.1, 2.1]> : tensor<2xf32>
   check.expect_almost_eq_const(%c, dense<[1.1, 2.1]> : tensor<2xf32>) : tensor<2xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/convert.mlir b/iree/test/e2e/xla_ops/convert.mlir
index d0e9c2d..6a8606a 100644
--- a/iree/test/e2e/xla_ops/convert.mlir
+++ b/iree/test/e2e/xla_ops/convert.mlir

@@ -1,48 +1,48 @@
 func @narrow_int_i32_i8() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi32>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi8>
+  %res = "mhlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi8>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi8>) : tensor<3xi8>
   return
 }
 
 func @widen_int_i8_i32() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi8>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi8>) -> tensor<3xi32>
+  %res = "mhlo.convert"(%input) : (tensor<3xi8>) -> tensor<3xi32>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
 
 func @narrow_int_i32_i16() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi32>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi16>
+  %res = "mhlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi16>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi16>) : tensor<3xi16>
   return
 }
 
 func @widen_int_i16_i32() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi16>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi16>) -> tensor<3xi32>
+  %res = "mhlo.convert"(%input) : (tensor<3xi16>) -> tensor<3xi32>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
 
 func @narrow_int_i64_i32() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi64>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi64>) -> tensor<3xi32>
+  %res = "mhlo.convert"(%input) : (tensor<3xi64>) -> tensor<3xi32>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
 
 func @widen_int_i32_i64() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi32>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi64>
+  %res = "mhlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xi64>
   check.expect_eq_const(%res, dense<[-42, 0, 42]> : tensor<3xi64>) : tensor<3xi64>
   return
 }
 
 func @int_to_float() {
   %input = iree.unfoldable_constant dense<[-42, 0, 42]> : tensor<3xi32>
-  %res = "xla_hlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xf32>
+  %res = "mhlo.convert"(%input) : (tensor<3xi32>) -> tensor<3xf32>
   check.expect_almost_eq_const(%res, dense<[-42.0, 0.0, 42.0]> : tensor<3xf32>) : tensor<3xf32>
   return
 }
@@ -51,7 +51,7 @@
 // https://www.tensorflow.org/xla/operation_semantics#convertelementtype
 func @float_to_int() {
   %input = iree.unfoldable_constant dense<[-10.5, -4.4, 4.4, 10.5]> : tensor<4xf32>
-  %res = "xla_hlo.convert"(%input) : (tensor<4xf32>) -> tensor<4xi32>
+  %res = "mhlo.convert"(%input) : (tensor<4xf32>) -> tensor<4xi32>
   check.expect_eq_const(%res, dense<[-10, -4, 4, 10]> : tensor<4xi32>) : tensor<4xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/convolution.mlir b/iree/test/e2e/xla_ops/convolution.mlir
index b56caf8..22c5258 100644
--- a/iree/test/e2e/xla_ops/convolution.mlir
+++ b/iree/test/e2e/xla_ops/convolution.mlir

@@ -8,7 +8,7 @@
       [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
       [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
       [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
         batch_group_count = 1 : i64,
         dimension_numbers = {
           input_batch_dimension = 0 : i64,
@@ -40,7 +40,7 @@
       [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
       [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
       [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
        batch_group_count = 1 : i64,
        dimension_numbers = {
          input_batch_dimension = 0 : i64,
@@ -82,7 +82,7 @@
 //   %weights = iree.unfoldable_constant dense<[
 //       [[[1.0]], [[2.0]], [[3.0]]],
 //       [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
-//   %res = "xla_hlo.convolution"(%inputs, %weights) {
+//   %res = "mhlo.convolution"(%inputs, %weights) {
 //        batch_group_count = 1 : i64,
 //        dimension_numbers = {
 //          input_batch_dimension = 0 : i64,
@@ -172,7 +172,7 @@
        [[ 91.0,  92.0,  93.0,  94.0,  95.0,  96.0],
         [ 97.0,  98.0,  99.0, 100.0, 101.0, 102.0],
         [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
-  %res = "xla_hlo.convolution"(%inputs, %weights) {
+  %res = "mhlo.convolution"(%inputs, %weights) {
        batch_group_count = 1 : i64,
        dimension_numbers = {
          input_batch_dimension = 0 : i64,

diff --git a/iree/test/e2e/xla_ops/cosine.mlir b/iree/test/e2e/xla_ops/cosine.mlir
index f0a7120..cc14907 100644
--- a/iree/test/e2e/xla_ops/cosine.mlir
+++ b/iree/test/e2e/xla_ops/cosine.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[0.0, 1.0, 1.5, 2.0]> : tensor<4xf32>
-  %result = "xla_hlo.cosine"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.cosine"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 0.5403, 0.0707, -0.4161]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<3.0> : tensor<f32>
-  %result = "xla_hlo.cosine"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.cosine"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-0.99> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/divide.mlir b/iree/test/e2e/xla_ops/divide.mlir
index 659cc89..1f4aaaa 100644
--- a/iree/test/e2e/xla_ops/divide.mlir
+++ b/iree/test/e2e/xla_ops/divide.mlir

@@ -1,7 +1,7 @@
 func @i32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[5, 6, 7, 8]> : tensor<4xi32>
   %1 = iree.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.divide"(%0, %1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.divide"(%0, %1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[5, 3, 2, 2]> : tensor<4xi32>) : tensor<4xi32>
   return
 }
@@ -9,7 +9,7 @@
 func @f32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[5.0, 6.0, 7.0, 8.0]> : tensor<4xf32>
   %1 = iree.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.divide"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.divide"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[5.0, 3.0, 2.333333, 2.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/dot.mlir b/iree/test/e2e/xla_ops/dot.mlir
index e54f584..a492b62 100644
--- a/iree/test/e2e/xla_ops/dot.mlir
+++ b/iree/test/e2e/xla_ops/dot.mlir

@@ -1,7 +1,7 @@
 func @dot_passthrough() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[[0.3, 0.5]]> : tensor<1x2xf32>
   %rhs = iree.unfoldable_constant  dense<[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]> : tensor<2x3xf32>
-  %res = "xla_hlo.dot"(%lhs, %rhs) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x2xf32>, tensor<2x3xf32>) -> tensor<1x3xf32>
+  %res = "mhlo.dot"(%lhs, %rhs) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x2xf32>, tensor<2x3xf32>) -> tensor<1x3xf32>
   check.expect_almost_eq_const(%res, dense<[[0.23, 0.31, 0.39]]> : tensor<1x3xf32>) : tensor<1x3xf32>
   return
 }
@@ -17,7 +17,7 @@
     [15.0, 14.0, 13.0, 12.0, 11.0],
     [10.0, 09.0, 08.0, 07.0, 06.0],
     [05.0, 04.0, 03.0, 02.0, 01.0]]> : tensor<3x5xf32>
-  %res = "xla_hlo.dot"(%lhs, %rhs) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
+  %res = "mhlo.dot"(%lhs, %rhs) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<5x3xf32>, tensor<3x5xf32>) -> tensor<5x5xf32>
   check.expect_almost_eq_const(%res, dense<[
     [430.0, 388.0, 346.0, 304.0, 262.0],
     [340.0, 307.0, 274.0, 241.0, 208.0],
@@ -30,7 +30,7 @@
 func @large_matmul() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<32x1024xf32>
   %rhs = iree.unfoldable_constant dense<0.4> : tensor<1024x64xf32>
-  %res = "xla_hlo.dot"(%lhs, %rhs) : (tensor<32x1024xf32>, tensor<1024x64xf32>) -> tensor<32x64xf32>
+  %res = "mhlo.dot"(%lhs, %rhs) : (tensor<32x1024xf32>, tensor<1024x64xf32>) -> tensor<32x64xf32>
   check.expect_almost_eq_const(%res, dense<409.596> : tensor<32x64xf32>) : tensor<32x64xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/dot_general.mlir b/iree/test/e2e/xla_ops/dot_general.mlir
index 908ae61..16073eb 100644
--- a/iree/test/e2e/xla_ops/dot_general.mlir
+++ b/iree/test/e2e/xla_ops/dot_general.mlir

@@ -1,7 +1,7 @@
 func @dot_general_lower() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[[[0.3, 0.5]]]> : tensor<1x1x2xf32>
   %rhs = iree.unfoldable_constant  dense<[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]> : tensor<2x3xf32>
-  %res = "xla_hlo.dot_general"(%lhs, %rhs) {
+  %res = "mhlo.dot_general"(%lhs, %rhs) {
       dot_dimension_numbers = {
           lhs_batching_dimensions = dense<[]> : tensor<0xi64>,
           lhs_contracting_dimensions = dense<2> : tensor<1xi64>,
@@ -17,7 +17,7 @@
 func @dot_general_lower_swapped() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant  dense<[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]> : tensor<2x3xf32>
   %rhs = iree.unfoldable_constant dense<[[[0.3, 0.5]]]> : tensor<1x1x2xf32>
-  %res = "xla_hlo.dot_general"(%lhs, %rhs) {
+  %res = "mhlo.dot_general"(%lhs, %rhs) {
         dot_dimension_numbers = {
             lhs_batching_dimensions = dense<[]> : tensor<0xi64>,
             lhs_contracting_dimensions = dense<0> : tensor<1xi64>,
@@ -36,7 +36,7 @@
     [1.0, 2.0, 3.0, 4.0],
     [1.0, 2.0, 3.0, 4.0],
     [1.0, 2.0, 3.0, 4.0]]]> : tensor<1x3x4xf32>
-  %res = "xla_hlo.dot_general"(%lhs, %rhs) {
+  %res = "mhlo.dot_general"(%lhs, %rhs) {
         dot_dimension_numbers = {
             lhs_batching_dimensions = dense<0> : tensor<1xi64>,
             lhs_contracting_dimensions = dense<2> : tensor<1xi64>,
@@ -61,7 +61,7 @@
     [1.0, 2.0, 3.0, 4.0],
     [1.0, 2.0, 3.0, 4.0],
     [1.0, 2.0, 3.0, 4.0]]]> : tensor<2x3x4xf32>
-  %res = "xla_hlo.dot_general"(%lhs, %rhs) {
+  %res = "mhlo.dot_general"(%lhs, %rhs) {
         dot_dimension_numbers = {
             lhs_batching_dimensions = dense<0> : tensor<1xi64>,
             lhs_contracting_dimensions = dense<2> : tensor<1xi64>,

diff --git a/iree/test/e2e/xla_ops/exponential.mlir b/iree/test/e2e/xla_ops/exponential.mlir
index ce5c9f9..6e91326 100644
--- a/iree/test/e2e/xla_ops/exponential.mlir
+++ b/iree/test/e2e/xla_ops/exponential.mlir

@@ -1,27 +1,27 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.exponential"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.exponential"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 2.7183, 7.3891, 54.5981]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<1.0> : tensor<f32>
-  %result = "xla_hlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<2.7183> : tensor<f32>) : tensor<f32>
   return
 }
 
 func @double() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<1.0> : tensor<f64>
-  %result = "xla_hlo.exponential"(%input) : (tensor<f64>) -> tensor<f64>
+  %result = "mhlo.exponential"(%input) : (tensor<f64>) -> tensor<f64>
   check.expect_almost_eq_const(%result, dense<2.7183> : tensor<f64>) : tensor<f64>
   return
 }
 
 func @negative() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<-1.0> : tensor<f32>
-  %result = "xla_hlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.exponential"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<0.367879> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/floor.mlir b/iree/test/e2e/xla_ops/floor.mlir
index 11ce5a5..dbb1b25 100644
--- a/iree/test/e2e/xla_ops/floor.mlir
+++ b/iree/test/e2e/xla_ops/floor.mlir

@@ -1,20 +1,20 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[0.0, 1.1, 2.5, 4.9]> : tensor<4xf32>
-  %result = "xla_hlo.floor"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.floor"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf32>): tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<101.3> : tensor<f32>
-  %result = "xla_hlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<101.0> : tensor<f32>): tensor<f32>
   return
 }
 
 func @negative() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<-1.1> : tensor<f32>
-  %result = "xla_hlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.floor"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>): tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/gather.mlir b/iree/test/e2e/xla_ops/gather.mlir
index b417fab..c6de261 100644
--- a/iree/test/e2e/xla_ops/gather.mlir
+++ b/iree/test/e2e/xla_ops/gather.mlir

@@ -6,7 +6,7 @@
     [[16, 17, 18, 19, 20]],
     [[21, 22, 23, 24, 25]]]> : tensor<5x1x5xi32>
   %start_indices = iree.unfoldable_constant dense<2> : tensor<i64>
-  %res = "xla_hlo.gather"(%input, %start_indices) {
+  %res = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,

diff --git a/iree/test/e2e/xla_ops/gather_concat.mlir b/iree/test/e2e/xla_ops/gather_concat.mlir
index 7c2f794..c781a4d 100644
--- a/iree/test/e2e/xla_ops/gather_concat.mlir
+++ b/iree/test/e2e/xla_ops/gather_concat.mlir

@@ -3,7 +3,7 @@
     [[05, 06, 07, 08]],
     [[09, 10, 11, 12]]]> : tensor<2x1x4xi32>
   %start_indices = iree.unfoldable_constant dense<0> : tensor<i64>
-  %gath = "xla_hlo.gather"(%input, %start_indices) {
+  %gath = "mhlo.gather"(%input, %start_indices) {
     dimension_numbers = {
       collapsed_slice_dims = dense<0> : tensor<1xi64>,
       index_vector_dim = 0 : i64,
@@ -12,7 +12,7 @@
       slice_sizes = dense<[1, 1, 4]> : tensor<3xi64>
   } : (tensor<2x1x4xi32>, tensor<i64>) -> tensor<1x4xi32>
   %suffix = iree.unfoldable_constant dense<[[1, 2]]> : tensor<1x2xi32>
-  %res = "xla_hlo.concatenate"(%gath, %suffix) {dimension = 1 : i64} : (tensor<1x4xi32>, tensor<1x2xi32>) -> tensor<1x6xi32>
+  %res = "mhlo.concatenate"(%gath, %suffix) {dimension = 1 : i64} : (tensor<1x4xi32>, tensor<1x2xi32>) -> tensor<1x6xi32>
   check.expect_eq_const(%res, dense<[[5, 6, 7, 8, 1, 2]]> : tensor<1x6xi32>) : tensor<1x6xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/log.mlir b/iree/test/e2e/xla_ops/log.mlir
index 5764189..c5c2ebc 100644
--- a/iree/test/e2e/xla_ops/log.mlir
+++ b/iree/test/e2e/xla_ops/log.mlir

@@ -1,20 +1,20 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.log"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.log"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[0.0, 0.693147, 1.09861, 1.38629]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<4.0> : tensor<f32>
-  %result = "xla_hlo.log"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.log"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<1.3863> : tensor<f32>) : tensor<f32>
   return
 }
 
 func @double() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<4.0> : tensor<f64>
-  %result = "xla_hlo.log"(%input) : (tensor<f64>) -> tensor<f64>
+  %result = "mhlo.log"(%input) : (tensor<f64>) -> tensor<f64>
   check.expect_almost_eq_const(%result, dense<1.3863> : tensor<f64>) : tensor<f64>
   return
 }

diff --git a/iree/test/e2e/xla_ops/maximum.mlir b/iree/test/e2e/xla_ops/maximum.mlir
index 127abef..0f31705 100644
--- a/iree/test/e2e/xla_ops/maximum.mlir
+++ b/iree/test/e2e/xla_ops/maximum.mlir

@@ -1,7 +1,7 @@
 func @tensor_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 6, 7, 8]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 6, 3, 8]> : tensor<4xi32>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[5, 6, 7, 8]> : tensor<4xi32>) : tensor<4xi32>
   return
 }
@@ -9,7 +9,7 @@
 func @tensor_odd_dim() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 6, 7]> : tensor<3xi32>
   %rhs = iree.unfoldable_constant dense<[5, 6, 3]> : tensor<3xi32>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
   check.expect_eq_const(%result, dense<[5, 6,7]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
@@ -17,7 +17,7 @@
 func @scalar_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i32>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<2> : tensor<i32>) : tensor<i32>
   return
 }
@@ -25,7 +25,7 @@
 func @negative_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<-2> : tensor<i32>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<1> : tensor<i32>) : tensor<i32>
   return
 }
@@ -33,7 +33,7 @@
 func @i8() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i8>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i8>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%result, dense<2> : tensor<i8>) : tensor<i8>
   return
 }
@@ -41,7 +41,7 @@
 func @i16() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i16>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i16>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<i16>, tensor<i16>) -> tensor<i16>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<i16>, tensor<i16>) -> tensor<i16>
   check.expect_eq_const(%result, dense<2> : tensor<i16>) : tensor<i16>
   return
 }
@@ -49,7 +49,7 @@
 func @i64() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i64>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i64>
-  %result = "xla_hlo.maximum"(%lhs, %rhs) : (tensor<i64>, tensor<i64>) -> tensor<i64>
+  %result = "mhlo.maximum"(%lhs, %rhs) : (tensor<i64>, tensor<i64>) -> tensor<i64>
   check.expect_eq_const(%result, dense<2> : tensor<i64>) : tensor<i64>
   return
 }
@@ -57,7 +57,7 @@
 func @tensor_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1.0, 2.0, 7.0, 4.0]> : tensor<4xf32>
   %rhs = iree.unfoldable_constant dense<[5.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
@@ -65,7 +65,7 @@
 func @scalar_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f32>
   %rhs = iree.unfoldable_constant dense<2.0> : tensor<f32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<1.0> : tensor<f32>) : tensor<f32>
   return
 }
@@ -73,7 +73,7 @@
 func @double() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f64>
   %rhs = iree.unfoldable_constant dense<2.0> : tensor<f64>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f64>, tensor<f64>) -> tensor<f64>
   check.expect_almost_eq_const(%result, dense<1.0> : tensor<f64>) : tensor<f64>
   return
 }
@@ -81,7 +81,7 @@
 func @negative_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f32>
   %rhs = iree.unfoldable_constant dense<-2.0> : tensor<f32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/minimum.mlir b/iree/test/e2e/xla_ops/minimum.mlir
index 73da22d..3837ca2 100644
--- a/iree/test/e2e/xla_ops/minimum.mlir
+++ b/iree/test/e2e/xla_ops/minimum.mlir

@@ -1,7 +1,7 @@
 func @tensor_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3, 4]> : tensor<4xi32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[1, 2, 3, 4]> : tensor<4xi32>) : tensor<4xi32>
   return
 }
@@ -9,7 +9,7 @@
 func @tensor_odd_dim() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1, 2, 7]> : tensor<3xi32>
   %rhs = iree.unfoldable_constant dense<[5, 2, 3]> : tensor<3xi32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
   check.expect_eq_const(%result, dense<[1, 2, 3]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
@@ -17,7 +17,7 @@
 func @scalar_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<1> : tensor<i32>) : tensor<i32>
   return
 }
@@ -25,7 +25,7 @@
 func @negative_i32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i32>
   %rhs = iree.unfoldable_constant dense<-2> : tensor<i32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<-2> : tensor<i32>) : tensor<i32>
   return
 }
@@ -33,7 +33,7 @@
 func @i8() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i8>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i8>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<i8>, tensor<i8>) -> tensor<i8>
   check.expect_eq_const(%result, dense<1> : tensor<i8>) : tensor<i8>
   return
 }
@@ -41,7 +41,7 @@
 func @i16() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i16>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i16>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<i16>, tensor<i16>) -> tensor<i16>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<i16>, tensor<i16>) -> tensor<i16>
   check.expect_eq_const(%result, dense<1> : tensor<i16>) : tensor<i16>
   return
 }
@@ -49,7 +49,7 @@
 func @i64() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1> : tensor<i64>
   %rhs = iree.unfoldable_constant dense<2> : tensor<i64>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<i64>, tensor<i64>) -> tensor<i64>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<i64>, tensor<i64>) -> tensor<i64>
   check.expect_eq_const(%result, dense<1> : tensor<i64>) : tensor<i64>
   return
 }
@@ -57,7 +57,7 @@
 func @tensor_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<[1.0, 2.0, 7.0, 4.0]> : tensor<4xf32>
   %rhs = iree.unfoldable_constant dense<[5.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
@@ -65,7 +65,7 @@
 func @scalar_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f32>
   %rhs = iree.unfoldable_constant dense<2.0> : tensor<f32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<1.0> : tensor<f32>) : tensor<f32>
   return
 }
@@ -73,7 +73,7 @@
 func @double() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f64>
   %rhs = iree.unfoldable_constant dense<2.0> : tensor<f64>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f64>, tensor<f64>) -> tensor<f64>
   check.expect_almost_eq_const(%result, dense<1.0> : tensor<f64>) : tensor<f64>
   return
 }
@@ -81,7 +81,7 @@
 func @negative_f32() attributes { iree.module.export } {
   %lhs = iree.unfoldable_constant dense<1.0> : tensor<f32>
   %rhs = iree.unfoldable_constant dense<-2.0> : tensor<f32>
-  %result = "xla_hlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.minimum"(%lhs, %rhs) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/multiply.mlir b/iree/test/e2e/xla_ops/multiply.mlir
index c8ae27f..aac03b4 100644
--- a/iree/test/e2e/xla_ops/multiply.mlir
+++ b/iree/test/e2e/xla_ops/multiply.mlir

@@ -1,6 +1,6 @@
 func @multiply () attributes { iree.module.export } {
   %c2 = iree.unfoldable_constant dense<2.0> : tensor<f32>
-  %res = "xla_hlo.multiply"(%c2, %c2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %res = "mhlo.multiply"(%c2, %c2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%res, dense<4.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/negate.mlir b/iree/test/e2e/xla_ops/negate.mlir
index c98a6cc..a49380b 100644
--- a/iree/test/e2e/xla_ops/negate.mlir
+++ b/iree/test/e2e/xla_ops/negate.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[-1.0, -2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.negate"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.negate"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 2.0, -3.0, -4.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<-4.0> : tensor<f32>
-  %result = "xla_hlo.negate"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.negate"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<4.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/pad.mlir b/iree/test/e2e/xla_ops/pad.mlir
index 8ce0fa8..7f6df37 100644
--- a/iree/test/e2e/xla_ops/pad.mlir
+++ b/iree/test/e2e/xla_ops/pad.mlir

@@ -1,7 +1,7 @@
 func @pad_test() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.pad"(%input, %c0) {
+  %res = "mhlo.pad"(%input, %c0) {
     edge_padding_low = dense<[0, 1]> : tensor<2xi64>,
     edge_padding_high = dense<[1, 5]> : tensor<2xi64>,
     interior_padding = dense<0> : tensor<2xi64>
@@ -16,7 +16,7 @@
 func @pad_no_op() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32>
   %c0 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.pad"(%input, %c0) {edge_padding_high = dense<[0, 0]> : tensor<2xi64>, edge_padding_low = dense<[0, 0]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<2x3xi32>
+  %res = "mhlo.pad"(%input, %c0) {edge_padding_high = dense<[0, 0]> : tensor<2xi64>, edge_padding_low = dense<[0, 0]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<2x3xi32>
   check.expect_eq(%res, %input) : tensor<2x3xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/reduce.mlir b/iree/test/e2e/xla_ops/reduce.mlir
index ae695d7..c64db8e 100644
--- a/iree/test/e2e/xla_ops/reduce.mlir
+++ b/iree/test/e2e/xla_ops/reduce.mlir

@@ -2,10 +2,10 @@
 func @reduce_sum_1x10xi32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]> : tensor<1x10xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xi32>, tensor<i32>) -> tensor<1xi32>
   check.expect_eq_const(%res, dense<55> : tensor<1xi32>) : tensor<1xi32>
   return
@@ -15,10 +15,10 @@
 func @reduce_max_1x10xi32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]> : tensor<1x10xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.maximum"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.maximum"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xi32>, tensor<i32>) -> tensor<1xi32>
   check.expect_eq_const(%res, dense<10> : tensor<1xi32>) : tensor<1xi32>
   return
@@ -28,10 +28,10 @@
 func @reduce_min_5x1x1xi32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[[1]],[[2]],[[3]],[[4]],[[5]]]> : tensor<5x1x1xi32>
   %1 = iree.unfoldable_constant dense<999> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.minimum"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.minimum"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xi32>, tensor<i32>) -> tensor<5xi32>
   check.expect_eq_const(%res, dense<[1, 2, 3, 4, 5]> : tensor<5xi32>) : tensor<5xi32>
   return
@@ -46,10 +46,10 @@
       [1, 2, 3],
       [4, 5, 6]]> : tensor<2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3xi32>
   check.expect_eq_const(%res, dense<[5, 7, 9]> : tensor<3xi32>) : tensor<3xi32>
   return
@@ -60,10 +60,10 @@
       [1, 2, 3],
       [4, 5, 6]]> : tensor<2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<2xi32>
   check.expect_eq_const(%res, dense<[6, 15]> : tensor<2xi32>) : tensor<2xi32>
   return
@@ -76,10 +76,10 @@
       [[1, 2, 3], [4, 5, 6]],
       [[1, 2, 3], [4, 5, 6]]]> : tensor<4x2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4x2x3xi32>, tensor<i32>) -> tensor<2x3xi32>
   check.expect_eq_const(%res, dense<[[4, 8, 12],[16, 20, 24]]> : tensor<2x3xi32>) : tensor<2x3xi32>
   return
@@ -92,10 +92,10 @@
     [[1, 2, 3], [4, 5, 6]],
     [[1, 2, 3], [4, 5, 6]]]> : tensor<4x2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<2> : tensor<1xi64>} : (tensor<4x2x3xi32>, tensor<i32>) -> tensor<4x2xi32>
   check.expect_eq_const(%res, dense<[[6, 15],[6, 15],[6, 15],[6, 15]]> : tensor<4x2xi32>) : tensor<4x2xi32>
   return
@@ -108,10 +108,10 @@
       [[1, 2, 3], [4, 5, 6]],
       [[1, 2, 3], [4, 5, 6]]]> : tensor<4x2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x2x3xi32>, tensor<i32>) -> tensor<3xi32>
   check.expect_eq_const(%res, dense<[20, 28, 36]> : tensor<3xi32>) : tensor<3xi32>
   return
@@ -124,10 +124,10 @@
       [[1, 2, 3], [4, 5, 6]],
       [[1, 2, 3], [4, 5, 6]]]> : tensor<4x2x3xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<4x2x3xi32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%res, dense<84> : tensor<i32>) : tensor<i32>
   return
@@ -137,10 +137,10 @@
 func @reduce_sum_1x10xf32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]> : tensor<1x10xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
   check.expect_almost_eq_const(%res, dense<55.0> : tensor<1xf32>) : tensor<1xf32>
   return
@@ -150,11 +150,11 @@
 func @reduce_max_1x10xf32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]> : tensor<1x10xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1)
+  %res = "mhlo.reduce"(%0, %1)
   ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-      %3 = "xla_hlo.maximum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = "mhlo.maximum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
   })
   {dimensions = dense<1> : tensor<1xi64>} : (tensor<1x10xf32>, tensor<f32>) -> tensor<1xf32>
   check.expect_almost_eq_const(%res, dense<10.0> : tensor<1xf32>) : tensor<1xf32>
@@ -165,10 +165,10 @@
 func @reduce_min_5x1x1xf32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[[1.0]],[[2.0]],[[3.0]],[[4.0]],[[5.0]]]> : tensor<5x1x1xf32>
   %1 = iree.unfoldable_constant dense<999.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-      %3 = "xla_hlo.minimum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-      "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+      %3 = "mhlo.minimum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<[1, 2]> : tensor<2xi64>} : (tensor<5x1x1xf32>, tensor<f32>) -> tensor<5xf32>
   check.expect_almost_eq_const(%res, dense<[1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<5xf32>) : tensor<5xf32>
   return
@@ -180,10 +180,10 @@
 func @reduce_sum_2x3xf32_dim0() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<2x3xf32>, tensor<f32>) -> tensor<3xf32>
   check.expect_almost_eq_const(%res, dense<[5.0, 7.0, 9.0]> : tensor<3xf32>) : tensor<3xf32>
   return
@@ -192,10 +192,10 @@
 func @reduce_sum_2x3xf32_dim1() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x3xf32>, tensor<f32>) -> tensor<2xf32>
   check.expect_almost_eq_const(%res, dense<[6.0, 15.0]> : tensor<2xf32>) : tensor<2xf32>
   return
@@ -208,10 +208,10 @@
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]> : tensor<4x2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4x2x3xf32>, tensor<f32>) -> tensor<2x3xf32>
   check.expect_almost_eq_const(%res, dense<[[4.0, 8.0, 12.0],[16.0, 20.0, 24.0]]> : tensor<2x3xf32>) : tensor<2x3xf32>
   return
@@ -224,10 +224,10 @@
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]> : tensor<4x2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x2x3xf32>, tensor<f32>) -> tensor<4x3xf32>
   check.expect_almost_eq_const(%res, dense<[
       [5.0, 7.0, 9.0],
@@ -244,10 +244,10 @@
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]> : tensor<4x2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<2> : tensor<1xi64>} : (tensor<4x2x3xf32>, tensor<f32>) -> tensor<4x2xf32>
   check.expect_almost_eq_const(%res, dense<[
       [6.0, 15.0],
@@ -264,10 +264,10 @@
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]> : tensor<4x2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x2x3xf32>, tensor<f32>) -> tensor<3xf32>
   check.expect_almost_eq_const(%res, dense<[20.0, 28.0, 36.0]> : tensor<3xf32>) : tensor<3xf32>
   return
@@ -280,10 +280,10 @@
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
       [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]> : tensor<4x2x3xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce"(%0, %1) ( {
+  %res = "mhlo.reduce"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<4x2x3xf32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%res, dense<84.0> : tensor<f32>) : tensor<f32>
   return

diff --git a/iree/test/e2e/xla_ops/reduce_window.mlir b/iree/test/e2e/xla_ops/reduce_window.mlir
index f5c85d1..99ae9c9 100644
--- a/iree/test/e2e/xla_ops/reduce_window.mlir
+++ b/iree/test/e2e/xla_ops/reduce_window.mlir

@@ -4,10 +4,10 @@
                                        [13, 14, 15, 16, 17, 18],
                                        [19, 20, 21, 22, 23, 24]]> : tensor<4x6xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce_window"(%0, %1) ( {
+  %res = "mhlo.reduce_window"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {window_dimensions = dense<[2, 3]> : tensor<2xi64>,
       window_strides = dense<[2, 3]> : tensor<2xi64>} : (tensor<4x6xi32>, tensor<i32>) -> tensor<2x2xi32>
   check.expect_eq_const(%res, dense<[[30, 48],[102, 120]]> : tensor<2x2xi32>) : tensor<2x2xi32>
@@ -20,10 +20,10 @@
                                        [13, 14, 15, 16, 17, 18],
                                        [19, 20, 21, 22, 23, 24]]> : tensor<4x6xi32>
   %1 = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.reduce_window"(%0, %1) ( {
+  %res = "mhlo.reduce_window"(%0, %1) ( {
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):   // no predecessors
-    %3 = "xla_hlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
-    "xla_hlo.return"(%3) : (tensor<i32>) -> ()
+    %3 = "mhlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    "mhlo.return"(%3) : (tensor<i32>) -> ()
   }) {window_dimensions = dense<[2, 3]> : tensor<2xi64>,
       window_strides = dense<[1, 1]> : tensor<2xi64>} : (tensor<4x6xi32>, tensor<i32>) -> tensor<3x4xi32>
   check.expect_eq_const(%res, dense<[
@@ -39,10 +39,10 @@
                                        [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],
                                        [19.0, 20.0, 21.0, 22.0, 23.0, 24.0]]> : tensor<4x6xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce_window"(%0, %1) ( {
+  %res = "mhlo.reduce_window"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.maximum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.maximum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {window_dimensions = dense<[2, 3]> : tensor<2xi64>,
       window_strides = dense<[2, 3]> : tensor<2xi64>} : (tensor<4x6xf32>, tensor<f32>) -> tensor<2x2xf32>
   check.expect_almost_eq_const(%res, dense<[[9.0, 12.0], [21.0, 24.0]]> : tensor<2x2xf32>) : tensor<2x2xf32>
@@ -55,10 +55,10 @@
                                        [-13.0, -14.0, -15.0, -16.0, -17.0, -18.0],
                                        [-19.0, -20.0, -21.0, -22.0, -23.0, -24.0]]> : tensor<4x6xf32>
   %1 = iree.unfoldable_constant dense<0.0> : tensor<f32>
-  %res = "xla_hlo.reduce_window"(%0, %1) ( {
+  %res = "mhlo.reduce_window"(%0, %1) ( {
   ^bb0(%arg0: tensor<f32>, %arg1: tensor<f32>):   // no predecessors
-    %3 = "xla_hlo.minimum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    "xla_hlo.return"(%3) : (tensor<f32>) -> ()
+    %3 = "mhlo.minimum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+    "mhlo.return"(%3) : (tensor<f32>) -> ()
   }) {window_dimensions = dense<[2, 3]> : tensor<2xi64>,
       window_strides = dense<[2, 3]> : tensor<2xi64>} : (tensor<4x6xf32>, tensor<f32>) -> tensor<2x2xf32>
   check.expect_almost_eq_const(%res, dense<[[-9.0, -12.0], [-21.0, -24.0]]> : tensor<2x2xf32>) : tensor<2x2xf32>

diff --git a/iree/test/e2e/xla_ops/remainder.mlir b/iree/test/e2e/xla_ops/remainder.mlir
index a04df64..6142705 100644
--- a/iree/test/e2e/xla_ops/remainder.mlir
+++ b/iree/test/e2e/xla_ops/remainder.mlir

@@ -1,7 +1,7 @@
 func @scalar() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<16.0> : tensor<f32>
   %input2 = iree.unfoldable_constant dense<7.0> : tensor<f32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<2.0> : tensor<f32>) : tensor<f32>
   return
 }
@@ -9,7 +9,7 @@
 func @tensor() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<[16.0, 17.0, 18.0]> : tensor<3xf32>
   %input2 = iree.unfoldable_constant dense<[7.0, 8.0, 9.0]> : tensor<3xf32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
   check.expect_almost_eq_const(%result, dense<[2.0, 1.0, 0.0]> : tensor<3xf32>) : tensor<3xf32>
   return
 }
@@ -17,7 +17,7 @@
 func @negative_den() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<16.0> : tensor<f32>
   %input2 = iree.unfoldable_constant dense<-7.0> : tensor<f32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<2.0> : tensor<f32>) : tensor<f32>
   return
 }
@@ -25,7 +25,7 @@
 func @negative_num() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<-16.0> : tensor<f32>
   %input2 = iree.unfoldable_constant dense<7.0> : tensor<f32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<-2.0> : tensor<f32>) : tensor<f32>
   return
 }
@@ -33,7 +33,7 @@
 func @scalar_int() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<16> : tensor<i32>
   %input2 = iree.unfoldable_constant dense<7> : tensor<i32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<2> : tensor<i32>) : tensor<i32>
   return
 }
@@ -41,7 +41,7 @@
 func @tensor_int() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<[16, 17, 18]> : tensor<3xi32>
   %input2 = iree.unfoldable_constant dense<[7, 8, 9]> : tensor<3xi32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<3xi32>, tensor<3xi32>) -> tensor<3xi32>
   check.expect_eq_const(%result, dense<[2, 1, 0]> : tensor<3xi32>) : tensor<3xi32>
   return
 }
@@ -49,7 +49,7 @@
 func @negative_den_int() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<16> : tensor<i32>
   %input2 = iree.unfoldable_constant dense<-7> : tensor<i32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<2> : tensor<i32>) : tensor<i32>
   return
 }
@@ -57,7 +57,7 @@
 func @negative_num_int() attributes { iree.module.export } {
   %input1 = iree.unfoldable_constant dense<-16> : tensor<i32>
   %input2 = iree.unfoldable_constant dense<7> : tensor<i32>
-  %result = "xla_hlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %result = "mhlo.remainder"(%input1, %input2) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%result, dense<-2> : tensor<i32>) : tensor<i32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/reshape.mlir b/iree/test/e2e/xla_ops/reshape.mlir
index 0b6ca1d..9e61153 100644
--- a/iree/test/e2e/xla_ops/reshape.mlir
+++ b/iree/test/e2e/xla_ops/reshape.mlir

@@ -1,6 +1,6 @@
 func @reshape_1D_2D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]> : tensor<12xi32>
-  %result = "xla_hlo.reshape"(%input) : (tensor<12xi32>) -> tensor<3x4xi32>
+  %result = "mhlo.reshape"(%input) : (tensor<12xi32>) -> tensor<3x4xi32>
   check.expect_eq_const(%result, dense<[
       [1, 2, 3, 4],
       [5, 6, 7, 8],
@@ -10,7 +10,7 @@
 
 func @reshape_1D_3D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]> : tensor<12xi32>
-  %result = "xla_hlo.reshape"(%input) : (tensor<12xi32>) -> tensor<2x2x3xi32>
+  %result = "mhlo.reshape"(%input) : (tensor<12xi32>) -> tensor<2x2x3xi32>
   check.expect_eq_const(%result, dense<[
       [[1, 2, 3], [4, 5, 6]],
       [[7, 8, 9], [10, 11, 12]]]> : tensor<2x2x3xi32>) : tensor<2x2x3xi32>
@@ -19,14 +19,14 @@
 
 func @reshape_2D_3D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]> : tensor<2x6xi32>
-  %result = "xla_hlo.reshape"(%input) : (tensor<2x6xi32>) -> tensor<2x1x6xi32>
+  %result = "mhlo.reshape"(%input) : (tensor<2x6xi32>) -> tensor<2x1x6xi32>
   check.expect_eq_const(%result, dense<[[[1, 2, 3, 4, 5, 6]], [[7, 8, 9, 10, 11, 12]]]> : tensor<2x1x6xi32>) : tensor<2x1x6xi32>
   return
 }
 
 func @reshape_3D_1D() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[[1, 2, 3, 4, 5, 6]], [[7, 8, 9, 10, 11, 12]]]> : tensor<2x1x6xi32>
-  %result = "xla_hlo.reshape"(%input) : (tensor<2x1x6xi32>) -> tensor<2x6xi32>
+  %result = "mhlo.reshape"(%input) : (tensor<2x1x6xi32>) -> tensor<2x6xi32>
   check.expect_eq_const(%result, dense<[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]> : tensor<2x6xi32>) : tensor<2x6xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/reverse.mlir b/iree/test/e2e/xla_ops/reverse.mlir
index b78a61f..37a55fd 100644
--- a/iree/test/e2e/xla_ops/reverse.mlir
+++ b/iree/test/e2e/xla_ops/reverse.mlir

@@ -1,19 +1,19 @@
 func @xla_reverse() attributes { iree.module.export } {
   %t1 = iree.unfoldable_constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
 
-  %dim0 = "xla_hlo.reverse"(%t1) {dimensions = dense<0> : tensor<1xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  %dim0 = "mhlo.reverse"(%t1) {dimensions = dense<0> : tensor<1xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
   check.expect_almost_eq_const(
       %dim0,
       dense<[[4.0, 5.0, 6.0], [1.0, 2.0, 3.0]]> : tensor<2x3xf32>
   ) : tensor<2x3xf32>
 
-  %dim1 = "xla_hlo.reverse"(%t1) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  %dim1 = "mhlo.reverse"(%t1) {dimensions = dense<1> : tensor<1xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
   check.expect_almost_eq_const(
       %dim1,
       dense<[[3.0, 2.0, 1.0], [6.0, 5.0, 4.0]]> : tensor<2x3xf32>
   ) : tensor<2x3xf32>
 
-  %both_dims = "xla_hlo.reverse"(%t1) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  %both_dims = "mhlo.reverse"(%t1) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<2x3xf32>) -> tensor<2x3xf32>
   check.expect_almost_eq_const(
       %both_dims,
       dense<[[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]]> : tensor<2x3xf32>

diff --git a/iree/test/e2e/xla_ops/rsqrt.mlir b/iree/test/e2e/xla_ops/rsqrt.mlir
index 5a71938..50c4b3a 100644
--- a/iree/test/e2e/xla_ops/rsqrt.mlir
+++ b/iree/test/e2e/xla_ops/rsqrt.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.rsqrt"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.rsqrt"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 0.707107, 0.57735, 0.5]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<16.0> : tensor<f32>
-  %result = "xla_hlo.rsqrt"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.rsqrt"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<0.25> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/select.mlir b/iree/test/e2e/xla_ops/select.mlir
index 564d55f..bc77c00 100644
--- a/iree/test/e2e/xla_ops/select.mlir
+++ b/iree/test/e2e/xla_ops/select.mlir

@@ -2,10 +2,10 @@
   // TODO(b/132205704) support i1 in constants and function signatures.
   %input = iree.unfoldable_constant dense<[1, 0, 1, 0]> : tensor<4xi32>
   %zeros = iree.unfoldable_constant dense<0> : tensor<4xi32>
-  %cond = "xla_hlo.compare"(%input, %zeros) {comparison_direction = "GT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
+  %cond = "mhlo.compare"(%input, %zeros) {comparison_direction = "GT"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi1>
   %lhs = iree.unfoldable_constant dense<[1, 2, 3, 4]> : tensor<4xi32>
   %rhs = iree.unfoldable_constant dense<[5, 6, 7, 8]> : tensor<4xi32>
-  %result = "xla_hlo.select"(%cond, %lhs, %rhs) : (tensor<4xi1>, tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.select"(%cond, %lhs, %rhs) : (tensor<4xi1>, tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[1,6, 3, 8]> : tensor<4xi32>) : tensor<4xi32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/sine.mlir b/iree/test/e2e/xla_ops/sine.mlir
index 799c495..e5bbf5a 100644
--- a/iree/test/e2e/xla_ops/sine.mlir
+++ b/iree/test/e2e/xla_ops/sine.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[0.0, 1.0, 1.5, 2.0]> : tensor<4xf32>
-  %result = "xla_hlo.sine"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.sine"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[0.0, 0.8415, 0.9975, 0.9093]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<3.0> : tensor<f32>
-  %result = "xla_hlo.sine"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.sine"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<0.14112> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/slice.mlir b/iree/test/e2e/xla_ops/slice.mlir
index 8269987..a4cd572 100644
--- a/iree/test/e2e/xla_ops/slice.mlir
+++ b/iree/test/e2e/xla_ops/slice.mlir

@@ -3,7 +3,7 @@
     [01, 02, 03, 04],
     [05, 06, 07, 08],
     [09, 10, 11, 12]]> : tensor<3x4xi32>
-  %result = "xla_hlo.slice"(%input) {
+  %result = "mhlo.slice"(%input) {
     start_indices = dense<[0, 0]> : tensor<2xi64>,
     limit_indices = dense<[3, 4]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>
@@ -20,7 +20,7 @@
     [01, 02, 03, 04],
     [05, 06, 07, 08],
     [09, 10, 11, 12]]> : tensor<3x4xi32>
-  %result = "xla_hlo.slice"(%input) {
+  %result = "mhlo.slice"(%input) {
     start_indices = dense<[1, 0]> : tensor<2xi64>,
     limit_indices = dense<[2, 4]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>
@@ -34,7 +34,7 @@
     [01, 02, 03, 04],
     [05, 06, 07, 08],
     [09, 10, 11, 12]]> : tensor<3x4xi32>
-  %result = "xla_hlo.slice"(%input) {
+  %result = "mhlo.slice"(%input) {
     start_indices = dense<[1, 1]> : tensor<2xi64>,
     limit_indices = dense<[2, 3]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>
@@ -48,7 +48,7 @@
     [01, 02, 03, 04],
     [05, 06, 07, 08],
     [09, 10, 11, 12]]> : tensor<3x4xi32>
-  %result = "xla_hlo.slice"(%input) {
+  %result = "mhlo.slice"(%input) {
     start_indices = dense<[1, 0]> : tensor<2xi64>,
     limit_indices = dense<[3, 4]> : tensor<2xi64>,
     strides = dense<1> : tensor<2xi64>

diff --git a/iree/test/e2e/xla_ops/sqrt.mlir b/iree/test/e2e/xla_ops/sqrt.mlir
index f6ee3c5..b536efe 100644
--- a/iree/test/e2e/xla_ops/sqrt.mlir
+++ b/iree/test/e2e/xla_ops/sqrt.mlir

@@ -1,13 +1,13 @@
 func @tensor() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
-  %result = "xla_hlo.sqrt"(%input) : (tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.sqrt"(%input) : (tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[1.0, 1.4142, 1.7321, 2.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }
 
 func @scalar() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<16.0> : tensor<f32>
-  %result = "xla_hlo.sqrt"(%input) : (tensor<f32>) -> tensor<f32>
+  %result = "mhlo.sqrt"(%input) : (tensor<f32>) -> tensor<f32>
   check.expect_almost_eq_const(%result, dense<4.0> : tensor<f32>) : tensor<f32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/subtract.mlir b/iree/test/e2e/xla_ops/subtract.mlir
index ed419f1..bfa4b28 100644
--- a/iree/test/e2e/xla_ops/subtract.mlir
+++ b/iree/test/e2e/xla_ops/subtract.mlir

@@ -1,7 +1,7 @@
 func @i32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[5, 6, 3, 4]> : tensor<4xi32>
   %1 = iree.unfoldable_constant dense<[1, 4, 7, 6]> : tensor<4xi32>
-  %result = "xla_hlo.subtract"(%0, %1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
+  %result = "mhlo.subtract"(%0, %1) : (tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32>
   check.expect_eq_const(%result, dense<[4, 2, -4, -2]> : tensor<4xi32>) : tensor<4xi32>
   return
 }
@@ -9,7 +9,7 @@
 func @f32() attributes { iree.module.export } {
   %0 = iree.unfoldable_constant dense<[5.0, 6.0, 3.0, 4.0]> : tensor<4xf32>
   %1 = iree.unfoldable_constant dense<[1.0, 4.0, 7.0, 6.0]> : tensor<4xf32>
-  %result = "xla_hlo.subtract"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  %result = "mhlo.subtract"(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
   check.expect_almost_eq_const(%result, dense<[4.0, 2.0, -4.0, -2.0]> : tensor<4xf32>) : tensor<4xf32>
   return
 }

diff --git a/iree/test/e2e/xla_ops/tanh.mlir b/iree/test/e2e/xla_ops/tanh.mlir
index de50fca..dc181d7 100644
--- a/iree/test/e2e/xla_ops/tanh.mlir
+++ b/iree/test/e2e/xla_ops/tanh.mlir

@@ -2,7 +2,7 @@
   %input = iree.unfoldable_constant dense<
       [[-100.0, -5.0, -0.5,   1.0],
        [   1.2,  2.0,  3.0, 100.0]]> : tensor<2x4xf32>
-  %result = "xla_hlo.tanh"(%input) : (tensor<2x4xf32>) -> tensor<2x4xf32>
+  %result = "mhlo.tanh"(%input) : (tensor<2x4xf32>) -> tensor<2x4xf32>
   check.expect_almost_eq_const(%result, dense<
       [[-1.0000, -0.9999, -0.4622, 0.7616],
        [ 0.8337,  0.9640,  0.9951, 1.0000]]> : tensor<2x4xf32>) : tensor<2x4xf32>

diff --git a/iree/test/e2e/xla_ops/torch_index_select.mlir b/iree/test/e2e/xla_ops/torch_index_select.mlir
index d4d46aa..a947d41 100644
--- a/iree/test/e2e/xla_ops/torch_index_select.mlir
+++ b/iree/test/e2e/xla_ops/torch_index_select.mlir

@@ -6,7 +6,7 @@
     [[16, 17, 18, 19, 20]],
     [[21, 22, 23, 24, 25]]]> : tensor<5x1x5xi32>
   %indices = iree.unfoldable_constant dense<[0, 2]> : tensor<2xi32>
-  %res = "xla_hlo.torch_index_select"(%input, %indices) {
+  %res = "mhlo.torch_index_select"(%input, %indices) {
     dim = 0 : i64,
     batch_dims = 0 : i64
   } : (tensor<5x1x5xi32>, tensor<2xi32>) -> tensor<2x1x5xi32>
@@ -20,7 +20,7 @@
     [[ 5,  6],[ 7,  8]],
     [[ 9, 10],[11, 12]]]> : tensor<3x2x2xi32>
   %indices = iree.unfoldable_constant dense<[0, 1]> : tensor<2xi32>
-  %res = "xla_hlo.torch_index_select"(%input, %indices) {
+  %res = "mhlo.torch_index_select"(%input, %indices) {
     dim = 1 : i64,
     batch_dims = 0 : i64
   } : (tensor<3x2x2xi32>, tensor<2xi32>) -> tensor<3x2x2xi32>
@@ -36,7 +36,7 @@
     [[16, 17, 18, 19, 20]],
     [[21, 22, 23, 24, 25]]]> : tensor<5x1x5xi32>
   %indices = iree.unfoldable_constant dense<0> : tensor<i32>
-  %res = "xla_hlo.torch_index_select"(%input, %indices) {
+  %res = "mhlo.torch_index_select"(%input, %indices) {
     dim = 0 : i64,
     batch_dims = 0 : i64
   } : (tensor<5x1x5xi32>, tensor<i32>) -> tensor<1x5xi32>

diff --git a/iree/test/e2e/xla_ops/transpose.mlir b/iree/test/e2e/xla_ops/transpose.mlir
index 678b1dc..d5420bf 100644
--- a/iree/test/e2e/xla_ops/transpose.mlir
+++ b/iree/test/e2e/xla_ops/transpose.mlir

@@ -1,7 +1,7 @@
 func @transpose_2d() attributes { iree.module.export } {
   %input = iree.unfoldable_constant dense<[[1, 2, 3],
                                            [4, 5, 6]]> : tensor<2x3xi32>
-  %0 = "xla_hlo.transpose"(%input) {
+  %0 = "mhlo.transpose"(%input) {
     permutation = dense<[1, 0]> : tensor<2xi64>
   } : (tensor<2x3xi32>) -> tensor<3x2xi32>
   check.expect_eq_const(%0, dense<[[1, 4],
@@ -15,7 +15,7 @@
                                             [ 4,  5,  6]],
                                            [[ 7,  8,  9],
                                             [10, 11, 12]]]> : tensor<2x2x3xi32>
-  %0 = "xla_hlo.transpose"(%input) {
+  %0 = "mhlo.transpose"(%input) {
     permutation = dense<[0, 2, 1]> : tensor<3xi64>
   } : (tensor<2x2x3xi32>) -> tensor<2x3x2xi32>
   check.expect_eq_const(%0, dense<[

diff --git a/iree/test/e2e/xla_ops/while.mlir b/iree/test/e2e/xla_ops/while.mlir
index 5bc2e30..bf0e477 100644
--- a/iree/test/e2e/xla_ops/while.mlir
+++ b/iree/test/e2e/xla_ops/while.mlir

@@ -1,14 +1,14 @@
 func @while() attributes { iree.module.export }  {
   %start = iree.unfoldable_constant dense<1> : tensor<i32>
   %bound = iree.unfoldable_constant dense<3> : tensor<i32>
-  %res = "xla_hlo.while"(%start) ( {
+  %res = "mhlo.while"(%start) ( {
   ^bb0(%count: tensor<i32>):
-    %1 = "xla_hlo.compare"(%count, %bound) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
-    "xla_hlo.return"(%1) : (tensor<i1>) -> ()
+    %1 = "mhlo.compare"(%count, %bound) {comparison_direction = "LT"} : (tensor<i32>, tensor<i32>) -> tensor<i1>
+    "mhlo.return"(%1) : (tensor<i1>) -> ()
   },  {
   ^bb0(%count: tensor<i32>):
-    %1 = xla_hlo.add %count, %count : tensor<i32>
-    "xla_hlo.return"(%1) : (tensor<i32>) -> ()
+    %1 = mhlo.add %count, %count : tensor<i32>
+    "mhlo.return"(%1) : (tensor<i32>) -> ()
   }) : (tensor<i32>) -> tensor<i32>
   check.expect_eq_const(%res, dense<4> : tensor<i32>) : tensor<i32>
   return

diff --git a/iree/tools/init_xla_dialects.h b/iree/tools/init_xla_dialects.h
index 2b7f4be..dcde258 100644
--- a/iree/tools/init_xla_dialects.h
+++ b/iree/tools/init_xla_dialects.h

@@ -29,7 +29,7 @@
 // all the possible dialects to be made available to the context automatically.
 inline void registerXLADialects() {
   static bool init_once = []() {
-    registerDialect<mlir::xla_hlo::XlaHloDialect>();
+    registerDialect<mlir::mhlo::XlaHloDialect>();
     registerDialect<mlir::xla_chlo::XlaHloClientDialect>();
     registerDialect<mlir::xla_lhlo::XlaLhloDialect>();
     return true;

diff --git a/iree/tools/test/simple.mlir b/iree/tools/test/simple.mlir
index 254ec43..d8624d2 100644
--- a/iree/tools/test/simple.mlir
+++ b/iree/tools/test/simple.mlir

@@ -15,7 +15,7 @@
 
 // CHECK-LABEL: EXEC @abs
 func @abs(%input : tensor<i32>) -> (tensor<i32>) attributes { iree.module.export } {
-  %result = "xla_hlo.abs"(%input) : (tensor<i32>) -> tensor<i32>
+  %result = "mhlo.abs"(%input) : (tensor<i32>) -> tensor<i32>
   return %result : tensor<i32>
 }
 // CHECK: i32=2