Renaming util.do_not_optimize to util.optimization_barrier. (#10884)

The previous naming made it sound like it prevented optimization of an
SSA value by disabling compiler optimizations on the producer of the
value. Instead it really just introduces a barrier that prevents
optimization across the value.

Fixes #7091.
diff --git a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
index abe9b22..200d384 100644
--- a/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
+++ b/compiler/src/iree/compiler/Bindings/Native/Transforms/test/wrap_entry_points_coarse_fences.mlir
@@ -48,7 +48,7 @@
 // CHECK-LABEL: func.func private @_primitiveArgOnly(
 func.func @primitiveArgOnly(%arg0: i32) {
   %0 = arith.addi %arg0, %arg0 : i32
-  util.do_not_optimize(%0) : i32
+  util.optimization_barrier %0 : i32
   return
 }
 
@@ -64,7 +64,7 @@
 // CHECK-LABEL: func.func private @_tensorArgOnly(
 func.func @tensorArgOnly(%arg0: tensor<4xf32>) {
   %0 = arith.addf %arg0, %arg0 : tensor<4xf32>
-  util.do_not_optimize(%0) : tensor<4xf32>
+  util.optimization_barrier %0 : tensor<4xf32>
   return
 }
 
@@ -79,7 +79,7 @@
 // CHECK-LABEL: func.func private @_primitiveResultOnly(
 func.func @primitiveResultOnly() -> i32 {
   %0 = arith.constant 8 : i32
-  %1 = util.do_not_optimize(%0) : i32
+  %1 = util.optimization_barrier %0 : i32
   return %1 : i32
 }
 
@@ -95,7 +95,7 @@
 // CHECK-LABEL: func.func private @_tensorResultOnly(
 func.func @tensorResultOnly() -> tensor<4xf32> {
   %0 = arith.constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32>
-  %1 = util.do_not_optimize(%0) : tensor<4xf32>
+  %1 = util.optimization_barrier %0 : tensor<4xf32>
   return %1 : tensor<4xf32>
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index 8920b2a..93cdf24 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
@@ -512,7 +512,8 @@
                               op.getLoc(), staticType.getDimSize(i))
                           .getResult();
       dynamicDims.push_back(
-          rewriter.create<IREE::Util::DoNotOptimizeOp>(op.getLoc(), dimValue)
+          rewriter
+              .create<IREE::Util::OptimizationBarrierOp>(op.getLoc(), dimValue)
               .getResult(0));
     }
     rewriter.replaceOpWithNewOp<IREE::Flow::TensorReshapeOp>(
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
index b6833f3..c380af0 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
@@ -23,8 +23,8 @@
   // CHECK-DAG: %[[CST:.+]] = arith.constant dense<2> : tensor<2x4xi32>
   // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
   // CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
-  // CHECK-DAG: %[[D0:.+]] = util.do_not_optimize(%[[C2]]) : index
-  // CHECK-DAG: %[[D1:.+]] = util.do_not_optimize(%[[C4]]) : index
+  // CHECK-DAG: %[[D0:.+]] = util.optimization_barrier %[[C2]] : index
+  // CHECK-DAG: %[[D1:.+]] = util.optimization_barrier %[[C4]] : index
   // CHECK: %[[T:.+]] = flow.tensor.reshape %[[CST]] : tensor<2x4xi32> -> tensor<?x?xi32>{%[[D0]], %[[D1]]}
   %0 = flow.tensor.constant dense<2> : tensor<2x4xi32> -> tensor<?x?xi32>
   %d0 = tensor.dim %0, %c0 : tensor<?x?xi32>
@@ -150,8 +150,8 @@
 func.func @reshapeToStaticEmpty(%arg0: tensor<4x?xf32>, %dim0: index) {
   // CHECK-NEXT: %[[RET:.+]] = flow.tensor.empty : tensor<4x0xf32>
   %0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<4x0xf32>
-  // CHECK-NEXT: util.do_not_optimize(%[[RET]])
-  util.do_not_optimize(%0) : tensor<4x0xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[RET]]
+  util.optimization_barrier %0 : tensor<4x0xf32>
   return
 }
 
@@ -162,8 +162,8 @@
 func.func @reshapeToDynamicEmpty(%arg0: tensor<4x?xf32>, %dim0: index, %dim1: index) {
   // CHECK: %[[RET:.+]] = flow.tensor.empty : tensor<0x?xf32>{%[[DIM1]]}
   %0 = flow.tensor.reshape %arg0 : tensor<4x?xf32>{%dim0} -> tensor<0x?xf32>{%dim1}
-  // CHECK-NEXT: util.do_not_optimize(%[[RET]])
-  util.do_not_optimize(%0) : tensor<0x?xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[RET]]
+  util.optimization_barrier %0 : tensor<0x?xf32>
   return
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
index 3cb929e..8fb53a6 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/ExportBenchmarkFuncs.cpp
@@ -79,12 +79,12 @@
   // hal.tensor.export
   auto bufferExportOp = initializerBuilder.create<IREE::HAL::TensorExportOp>(
       loc, globalOp.getType(), splatOp.getResult());
-  // util.do_not_optimize (try to prevent optimizations across the export)
-  auto dnoOp = initializerBuilder.create<IREE::Util::DoNotOptimizeOp>(
+  // util.optimization_barrier (try to prevent optimizations across the export)
+  auto barrierOp = initializerBuilder.create<IREE::Util::OptimizationBarrierOp>(
       loc, bufferExportOp.getTarget());
   // util.global.store
-  initializerBuilder.create<IREE::Util::GlobalStoreOp>(loc, dnoOp.getResult(0),
-                                                       globalOp.getName());
+  initializerBuilder.create<IREE::Util::GlobalStoreOp>(
+      loc, barrierOp.getResult(0), globalOp.getName());
   initializerBuilder.create<IREE::Util::InitializerReturnOp>(loc);
 
   return globalOp;
@@ -233,10 +233,10 @@
   }
   auto callOp = blockBuilder.create<mlir::func::CallOp>(loc, entryFuncOp, args);
 
-  // Sink all results with do_not_optimize to ensure that DCE does not
-  // remove the call.
+  // Sink all results with a barrier to ensure that DCE does not remove the
+  // call.
   for (auto result : callOp.getResults()) {
-    blockBuilder.create<IREE::Util::DoNotOptimizeOp>(loc, result);
+    blockBuilder.create<IREE::Util::OptimizationBarrierOp>(loc, result);
   }
   blockBuilder.create<mlir::func::ReturnOp>(loc);
 
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
index 90949d7..82ade23 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/cleanup_tensor_shapes.mlir
@@ -7,8 +7,8 @@
 func.func @stripTieShape(%arg0: tensor<?xi32>, %arg1: index) {
   // CHECK-NOT: flow.tensor.tie_shape
   %0 = flow.tensor.tie_shape %arg0 : tensor<?xi32>{%arg1}
-  // CHECK: util.do_not_optimize(%[[ARG0]])
-  %1 = util.do_not_optimize(%0) : tensor<?xi32>
+  // CHECK: util.optimization_barrier %[[ARG0]]
+  %1 = util.optimization_barrier %0 : tensor<?xi32>
   return
 }
 
@@ -23,6 +23,6 @@
   %c0 = arith.constant 0 : index
   // expected-error @+1 {{'tensor.dim' op unexpected during shape cleanup}}
   %0 = tensor.dim %arg0, %c0 : tensor<?xi32>
-  %1 = util.do_not_optimize(%0) : index
+  %1 = util.optimization_barrier %0 : index
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/expand_tensor_shapes.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/expand_tensor_shapes.mlir
index 81d5f48..171424a 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/expand_tensor_shapes.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/expand_tensor_shapes.mlir
@@ -14,8 +14,8 @@
   // CHECK-NEXT: %[[D2:.+]] = util.global.load @loadedGlobal__d2 : index
   // CHECK-NEXT: %[[TIED:.+]] = flow.tensor.tie_shape %[[TENSOR]] : tensor<4x?x?x2xf32>{%[[D1]], %[[D2]]}
   %0 = util.global.load @loadedGlobal : tensor<4x?x?x2xf32>
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED]])
-  util.do_not_optimize(%0) : tensor<4x?x?x2xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED]]
+  util.optimization_barrier %0 : tensor<4x?x?x2xf32>
   return
 }
 
@@ -50,10 +50,10 @@
   // CHECK-NEXT: %[[TIED_ARG0:.+]] = flow.tensor.tie_shape %[[ARG0]] : tensor<4x?x?x2xf32>{%[[ARG0_D1]], %[[ARG0_D2]]}
   // CHECK-NEXT: %[[TIED_ARG1:.+]] = flow.tensor.tie_shape %[[ARG1]] : tensor<?xi32>{%[[ARG1_D0]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_ARG0]])
-  util.do_not_optimize(%arg0) : tensor<4x?x?x2xf32>
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_ARG1]])
-  util.do_not_optimize(%arg1) : tensor<?xi32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_ARG0]]
+  util.optimization_barrier %arg0 : tensor<4x?x?x2xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_ARG1]]
+  util.optimization_barrier %arg1 : tensor<?xi32>
 
   return
 }
@@ -93,10 +93,10 @@
   // CHECK-NEXT: %[[TIED_RET0:.+]] = flow.tensor.tie_shape %[[RET]]#0 : tensor<4x?x?x2xf32>{%[[RET]]#1, %[[RET]]#2}
   // CHECK-NEXT: %[[TIED_RET1:.+]] = flow.tensor.tie_shape %[[RET]]#3 : tensor<?xi32>{%[[RET]]#4}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_RET0]])
-  util.do_not_optimize(%0#0) : tensor<4x?x?x2xf32>
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_RET1]])
-  util.do_not_optimize(%0#1) : tensor<?xi32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_RET0]]
+  util.optimization_barrier %0#0 : tensor<4x?x?x2xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_RET1]]
+  util.optimization_barrier %0#1 : tensor<?xi32>
 
   return
 }
@@ -123,10 +123,10 @@
   // CHECK-NEXT: %[[TIED_BB1_ARG0:.+]] = flow.tensor.tie_shape %[[BB1_ARG0]] : tensor<4x?x?x2xf32>{%[[BB1_ARG0_D1]], %[[BB1_ARG0_D2]]}
   // CHECK-NEXT: %[[TIED_BB1_ARG1:.+]] = flow.tensor.tie_shape %[[BB1_ARG1]] : tensor<?xi32>{%[[BB1_ARG1_D0]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_BB1_ARG0]])
-  util.do_not_optimize(%bb1_arg0) : tensor<4x?x?x2xf32>
-  // CHECK-NEXT: util.do_not_optimize(%[[TIED_BB1_ARG1]])
-  util.do_not_optimize(%bb1_arg1) : tensor<?xi32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_BB1_ARG0]]
+  util.optimization_barrier %bb1_arg0 : tensor<4x?x?x2xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[TIED_BB1_ARG1]]
+  util.optimization_barrier %bb1_arg1 : tensor<?xi32>
 
   return
 }
@@ -148,8 +148,8 @@
   // CHECK-NEXT: %[[SEL_TIED:.+]] = flow.tensor.tie_shape %[[SEL_TENSOR]] : tensor<4x?x?x2xf32>{%[[SEL_D1]], %[[SEL_D2]]}
   %0 = arith.select %cond, %arg0, %arg1 : tensor<4x?x?x2xf32>
 
-  // CHECK-NEXT: util.do_not_optimize(%[[SEL_TIED]])
-  util.do_not_optimize(%0) : tensor<4x?x?x2xf32>
+  // CHECK-NEXT: util.optimization_barrier %[[SEL_TIED]]
+  util.optimization_barrier %0 : tensor<4x?x?x2xf32>
 
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
index 645a584..9d4985f 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/export_benchmark_funcs.mlir
@@ -18,7 +18,7 @@
 //  CHECK-DAG:   %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
 //  CHECK-DAG:   %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : !hal.buffer_view
 // CHECK-NEXT:   %[[RET0:.+]] = call @simpleMul(%[[ARG0]], %[[ARG1]])
-//      CHECK:   util.do_not_optimize(%[[RET0]]) : !hal.buffer_view
+//      CHECK:   util.optimization_barrier %[[RET0]] : !hal.buffer_view
 //      CHECK:   return
 
 // -----
@@ -44,7 +44,7 @@
 // CHECK-DAG:   %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : i32
 // CHECK-DAG:   %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : i32
 //     CHECK:   %[[RET0:.+]] = call @while(%[[ARG0]], %[[ARG1]])
-//     CHECK:   util.do_not_optimize(%[[RET0]]) : i32
+//     CHECK:   util.optimization_barrier %[[RET0]] : i32
 //     CHECK:   return
 
 // -----
@@ -63,13 +63,13 @@
 //      CHECK: util.initializer {
 //  CHECK-DAG:   %[[SPLAT:.+]] = flow.tensor.splat %c0_i32
 //  CHECK-DAG:   %[[EXPORT:.+]] = hal.tensor.export %[[SPLAT]] : tensor<4xi32> -> !hal.buffer_view
-//  CHECK-DAG:   %[[DNO:.+]] = util.do_not_optimize(%[[EXPORT]])
+//  CHECK-DAG:   %[[DNO:.+]] = util.optimization_barrier %[[EXPORT]]
 // CHECK-NEXT:   util.global.store %[[DNO]], @[[GLOBAL_ARG0]]
 
 //      CHECK: func.func @importBufferViewBitcasting_benchmark()
 //  CHECK-DAG:   %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
 // CHECK-NEXT:   %[[RET0:.+]] = call @importBufferViewBitcasting(%[[ARG0]])
-//      CHECK:   util.do_not_optimize(%[[RET0]]) : !hal.buffer_view
+//      CHECK:   util.optimization_barrier %[[RET0]] : !hal.buffer_view
 //      CHECK:   return
 
 // -----
@@ -103,19 +103,19 @@
 //      CHECK: util.initializer {
 //  CHECK-DAG:   %[[SPLAT0:.+]] = flow.tensor.splat %c0_i32
 //  CHECK-DAG:   %[[EXPORT0:.+]] = hal.tensor.export %[[SPLAT0]] : tensor<4xi32> -> !hal.buffer_view
-//  CHECK-DAG:   %[[DNO0:.+]] = util.do_not_optimize(%[[EXPORT0]])
+//  CHECK-DAG:   %[[DNO0:.+]] = util.optimization_barrier %[[EXPORT0]]
 // CHECK-NEXT:   util.global.store %[[DNO0]], @[[GLOBAL_ARG0]]
 
 //      CHECK: util.global private @[[GLOBAL_ARG1:.+]] {noinline} : !hal.buffer
 //      CHECK: util.initializer {
 //  CHECK-DAG:   %[[SPLAT1:.+]] = flow.tensor.splat %c0_i32
 //  CHECK-DAG:   %[[EXPORT1:.+]] = hal.tensor.export %[[SPLAT1]] : tensor<4xi32> -> !hal.buffer
-//  CHECK-DAG:   %[[DNO1:.+]] = util.do_not_optimize(%[[EXPORT1]])
+//  CHECK-DAG:   %[[DNO1:.+]] = util.optimization_barrier %[[EXPORT1]]
 // CHECK-NEXT:   util.global.store %[[DNO1]], @[[GLOBAL_ARG1]]
 
 //      CHECK: func.func @exportBufferViewInPlace_benchmark()
 //  CHECK-DAG:   %[[ARG0:.+]] = util.global.load @[[GLOBAL_ARG0]] : !hal.buffer_view
 //  CHECK-DAG:   %[[ARG1:.+]] = util.global.load @[[GLOBAL_ARG1]] : !hal.buffer
 // CHECK-NEXT:   %[[RET0:.+]] = call @exportBufferViewInPlace(%[[ARG0]], %[[ARG1]])
-//      CHECK:   util.do_not_optimize(%[[RET0]]) : !hal.buffer_view
+//      CHECK:   util.optimization_barrier %[[RET0]] : !hal.buffer_view
 //      CHECK:   return
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp
index 8ccebcc..811712e 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/ResourceUsage.cpp
@@ -250,7 +250,7 @@
           getState() ^= trueUsage.getState();
           getState() ^= falseUsage.getState();
         })
-        .Case([&](IREE::Util::DoNotOptimizeOp op) {
+        .Case([&](IREE::Util::OptimizationBarrierOp op) {
           auto sourceUsage = solver.getElementFor<ValueResourceUsage>(
               *this, Position::forValue(op.getOperand(0)),
               DFX::Resolution::REQUIRED);
@@ -455,7 +455,7 @@
                 return WalkResult::advance();
               });
         })
-        .Case([&](IREE::Util::DoNotOptimizeOp op) {
+        .Case([&](IREE::Util::OptimizationBarrierOp op) {
           auto resultUsage = solver.getElementFor<ValueResourceUsage>(
               *this, Position::forValue(op.getResult(0)),
               DFX::Resolution::REQUIRED);
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
index 93376a2..06cbb6a 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/test/executable_ops.mlir
@@ -28,7 +28,7 @@
       // CHECK: %[[SUBSPAN:.+]] = stream.binding.subspan %[[INPUT]][%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:i64>
       // CHECK: = flow.dispatch.tensor.load %[[SUBSPAN]]
       %tied_input = flow.dispatch.tensor.load %input, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:i64> -> tensor<i64>
-      util.do_not_optimize(%tied_input) : tensor<i64>
+      util.optimization_barrier %tied_input : tensor<i64>
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
index b4719ef..1907b4b 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/RefineUsage.cpp
@@ -336,7 +336,7 @@
                                           RewritePatternSet &patterns) {
   // NOTE: only ops that return values or contain regions need to be handled.
   patterns.insert<ApplyInitializerOp, ApplyFuncOp>(context, analysis);
-  patterns.insert<ApplyGenericOp<IREE::Util::DoNotOptimizeOp>,
+  patterns.insert<ApplyGenericOp<IREE::Util::OptimizationBarrierOp>,
                   ApplyGenericOp<mlir::arith::SelectOp>,
                   ApplyGenericOp<mlir::func::CallOp>,
                   ApplyGenericOp<IREE::Stream::TimepointBarrierOp>>(context,
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
index b0be2ab..8e506ed 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir
@@ -125,8 +125,8 @@
   // CHECK: %[[CONSTANT_SIZE:.+]] = stream.resource.size %[[CONSTANT]] : !stream.resource<constant>
   // CHECK: %[[INITIAL:.+]] = stream.async.transfer %[[CONSTANT]] : !stream.resource<constant>{%[[CONSTANT_SIZE]]} -> !stream.resource<*>{%[[CONSTANT_SIZE]]}
   %cst = arith.constant dense<4> : tensor<i32>
-  // CHECK: %[[INITIAL_DNO:.+]] = util.do_not_optimize(%[[INITIAL]]) : !stream.resource<*>
-  %0 = util.do_not_optimize(%cst) : tensor<i32>
+  // CHECK: %[[INITIAL_DNO:.+]] = util.optimization_barrier %[[INITIAL]] : !stream.resource<*>
+  %0 = util.optimization_barrier %cst : tensor<i32>
 
   // CHECK: %[[VAR_SIZE:.+]] = stream.resource.size %[[INITIAL_DNO]] : !stream.resource<*>
   // CHECK: cf.br ^bb1(%[[INITIAL_DNO]], %[[VAR_SIZE]] : !stream.resource<*>, index)
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
index 89150a5..dbf5d28 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/encode_device_tensors.mlir
@@ -11,8 +11,8 @@
       // CHECK: %[[TILE_I8:.+]] = flow.dispatch.tensor.load %[[BINDING]], {{.+}} : !flow.dispatch.tensor<readonly:4xi8> -> tensor<?xi8>
       // CHECK: %[[TILE_I1:.+]] = arith.trunci %[[TILE_I8]] : tensor<?xi8> to tensor<?xi1>
       %tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xi1> -> tensor<?xi1>
-      // CHECK: do_not_optimize(%[[TILE_I1]])
-      util.do_not_optimize(%tile) : tensor<?xi1>
+      // CHECK: util.optimization_barrier %[[TILE_I1]]
+      util.optimization_barrier %tile : tensor<?xi1>
       return
     }
   }
@@ -78,8 +78,8 @@
       // CHECK: %[[TILE_I8:.+]] = flow.dispatch.tensor.load %[[BINDING]], {{.+}} : !flow.dispatch.tensor<readonly:4xi64> -> tensor<?xi64>
       // CHECK: %[[TILE_I1:.+]] = arith.trunci %[[TILE_I8]] : tensor<?xi64> to tensor<?xi33>
       %tile = flow.dispatch.tensor.load %binding, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xi33> -> tensor<?xi33>
-      // CHECK: do_not_optimize(%[[TILE_I1]])
-      util.do_not_optimize(%tile) : tensor<?xi33>
+      // CHECK: util.optimization_barrier %[[TILE_I1]]
+      util.optimization_barrier %tile : tensor<?xi33>
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
index 3f3dcf1..985f28e 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fold_uniform_operands.mlir
@@ -13,18 +13,18 @@
   builtin.module  {
     // CHECK: func.func @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A01:.+]]: i32, %[[B0:.+]]: index, %[[C:.+]]: i1, %[[B1:.+]]: index)
     func.func @dispatch(%binding: !stream.binding, %a0: i32, %b0: index, %c: i1, %a1: i32, %b1: index) {
-      // CHECK-NEXT: util.do_not_optimize(%[[BINDING]]) : !stream.binding
-      util.do_not_optimize(%binding) : !stream.binding
-      // CHECK-NEXT: util.do_not_optimize(%[[A01]]) : i32
-      util.do_not_optimize(%a0) : i32
-      // CHECK-NEXT: util.do_not_optimize(%[[A01]]) : i32
-      util.do_not_optimize(%a1) : i32
-      // CHECK-NEXT: util.do_not_optimize(%[[B0]]) : index
-      util.do_not_optimize(%b0) : index
-      // CHECK-NEXT: util.do_not_optimize(%[[B1]]) : index
-      util.do_not_optimize(%b1) : index
-      // CHECK-NEXT: util.do_not_optimize(%[[C]]) : i1
-      util.do_not_optimize(%c) : i1
+      // CHECK-NEXT: util.optimization_barrier %[[BINDING]] : !stream.binding
+      util.optimization_barrier %binding : !stream.binding
+      // CHECK-NEXT: util.optimization_barrier %[[A01]] : i32
+      util.optimization_barrier %a0 : i32
+      // CHECK-NEXT: util.optimization_barrier %[[A01]] : i32
+      util.optimization_barrier %a1 : i32
+      // CHECK-NEXT: util.optimization_barrier %[[B0]] : index
+      util.optimization_barrier %b0 : index
+      // CHECK-NEXT: util.optimization_barrier %[[B1]] : index
+      util.optimization_barrier %b1 : index
+      // CHECK-NEXT: util.optimization_barrier %[[C]] : i1
+      util.optimization_barrier %c : i1
       return
     }
   }
@@ -63,14 +63,14 @@
     // CHECK: func.func @dispatch(%[[BINDING:.+]]: !stream.binding, %[[A:.+]]: i32, %[[C:.+]]: i1)
     func.func @dispatch(%binding: !stream.binding, %a: i32, %b: index, %c: i1) {
       // CHECK: %[[B:.+]] = arith.constant 20 : index
-      // CHECK-NEXT: util.do_not_optimize(%[[BINDING]]) : !stream.binding
-      util.do_not_optimize(%binding) : !stream.binding
-      // CHECK-NEXT: util.do_not_optimize(%[[A]]) : i32
-      util.do_not_optimize(%a) : i32
-      // CHECK-NEXT: util.do_not_optimize(%[[B]]) : index
-      util.do_not_optimize(%b) : index
-      // CHECK-NEXT: util.do_not_optimize(%[[C]]) : i1
-      util.do_not_optimize(%c) : i1
+      // CHECK-NEXT: util.optimization_barrier %[[BINDING]] : !stream.binding
+      util.optimization_barrier %binding : !stream.binding
+      // CHECK-NEXT: util.optimization_barrier %[[A]] : i32
+      util.optimization_barrier %a : i32
+      // CHECK-NEXT: util.optimization_barrier %[[B]] : index
+      util.optimization_barrier %b : index
+      // CHECK-NEXT: util.optimization_barrier %[[C]] : i1
+      util.optimization_barrier %c : i1
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
index 95295d3..0f999cc 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings.mlir
@@ -21,17 +21,17 @@
       // CHECK: %[[SUM_OFFSET_A:.+]] = arith.addi %c0, %[[OFFSET_A]]
       // CHECK: %[[SUBSPAN_A:.+]] = stream.binding.subspan %[[BINDING_A]][%[[SUM_OFFSET_A]]]
       %subspan_a = stream.binding.subspan %binding_a[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_A]])
-      util.do_not_optimize(%subspan_a) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_A]]
+      util.optimization_barrier %subspan_a : !flow.dispatch.tensor<readwrite:20xi8>
 
       // CHECK: %[[SUM_OFFSET_B:.+]] = arith.addi %c20, %[[OFFSET_B]]
       // CHECK-NEXT: %[[SUBSPAN_B:.+]] = stream.binding.subspan %[[BINDING_B]][%[[SUM_OFFSET_B]]]
       %subspan_b = stream.binding.subspan %binding_b[%c20] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_B]])
-      util.do_not_optimize(%subspan_b) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_B]]
+      util.optimization_barrier %subspan_b : !flow.dispatch.tensor<readwrite:20xi8>
 
-      // CHECK-NEXT: util.do_not_optimize(%[[OPERAND]]) : index
-      util.do_not_optimize(%operand) : index
+      // CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
+      util.optimization_barrier %operand : index
       return
     }
   }
@@ -100,23 +100,23 @@
       // CHECK: %[[SUM_OFFSET_A:.+]] = arith.addi %c0, %[[OFFSET_A]]
       // CHECK: %[[SUBSPAN_A:.+]] = stream.binding.subspan %[[BINDING_A]][%[[SUM_OFFSET_A]]]
       %subspan_a = stream.binding.subspan %binding_a[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_A]])
-      util.do_not_optimize(%subspan_a) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_A]]
+      util.optimization_barrier %subspan_a : !flow.dispatch.tensor<readwrite:20xi8>
 
       // CHECK: %[[SUM_OFFSET_B:.+]] = arith.addi %c20, %[[OFFSET_B]]
       // CHECK-NEXT: %[[SUBSPAN_B:.+]] = stream.binding.subspan %[[BINDING_B]][%[[SUM_OFFSET_B]]]
       %subspan_b = stream.binding.subspan %binding_b[%c20] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_B]])
-      util.do_not_optimize(%subspan_b) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_B]]
+      util.optimization_barrier %subspan_b : !flow.dispatch.tensor<readwrite:20xi8>
 
       // CHECK: %[[SUM_OFFSET_C:.+]] = arith.addi %c40, %[[OFFSET_C]]
       // CHECK-NEXT: %[[SUBSPAN_C:.+]] = stream.binding.subspan %[[BINDING_A]][%[[SUM_OFFSET_C]]]
       %subspan_c = stream.binding.subspan %binding_c[%c40] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_C]])
-      util.do_not_optimize(%subspan_c) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_C]]
+      util.optimization_barrier %subspan_c : !flow.dispatch.tensor<readwrite:20xi8>
 
-      // CHECK-NEXT: util.do_not_optimize(%[[OPERAND]]) : index
-      util.do_not_optimize(%operand) : index
+      // CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
+      util.optimization_barrier %operand : index
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
index d8d0e9f..2f99462 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/fuse_dispatch_bindings_noalias.mlir
@@ -19,23 +19,23 @@
       // CHECK: %[[SUM_OFFSET_A:.+]] = arith.addi %c0, %[[OFFSET_A]]
       // CHECK: %[[SUBSPAN_A:.+]] = stream.binding.subspan %[[BINDING_A]][%[[SUM_OFFSET_A]]]
       %subspan_a = stream.binding.subspan %binding_a[%c0] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_A]])
-      util.do_not_optimize(%subspan_a) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_A]]
+      util.optimization_barrier %subspan_a : !flow.dispatch.tensor<readwrite:20xi8>
 
       // CHECK: %[[SUM_OFFSET_B:.+]] = arith.addi %c20, %[[OFFSET_B]]
       // CHECK-NEXT: %[[SUBSPAN_B:.+]] = stream.binding.subspan %[[BINDING_A]][%[[SUM_OFFSET_B]]]
       %subspan_b = stream.binding.subspan %binding_b[%c20] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_B]])
-      util.do_not_optimize(%subspan_b) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_B]]
+      util.optimization_barrier %subspan_b : !flow.dispatch.tensor<readwrite:20xi8>
 
       // CHECK: %[[SUM_OFFSET_C:.+]] = arith.addi %c40, %[[OFFSET_C]]
       // CHECK-NEXT: %[[SUBSPAN_C:.+]] = stream.binding.subspan %[[BINDING_C]][%[[SUM_OFFSET_C]]]
       %subspan_c = stream.binding.subspan %binding_c[%c40] : !stream.binding -> !flow.dispatch.tensor<readwrite:20xi8>{%c20}
-      // CHECK-NEXT: util.do_not_optimize(%[[SUBSPAN_C]])
-      util.do_not_optimize(%subspan_c) : !flow.dispatch.tensor<readwrite:20xi8>
+      // CHECK-NEXT: util.optimization_barrier %[[SUBSPAN_C]]
+      util.optimization_barrier %subspan_c : !flow.dispatch.tensor<readwrite:20xi8>
 
-      // CHECK-NEXT: util.do_not_optimize(%[[OPERAND]]) : index
-      util.do_not_optimize(%operand) : index
+      // CHECK-NEXT: util.optimization_barrier %[[OPERAND]] : index
+      util.optimization_barrier %operand : index
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_allocations.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_allocations.mlir
index da1c992..9ad0475 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_allocations.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_allocations.mlir
@@ -17,10 +17,10 @@
   // CHECK: %[[SLICE_B:.+]] = stream.resource.subview %[[ALLOC]][%[[SLICES]]#2]
   // CHECK-SAME: !stream.resource<transient>{%[[SLICES]]#0} -> !stream.resource<transient>{%[[SIZE_B]]}
 
-  // CHECK: util.do_not_optimize(%[[SLICE_A]])
-  util.do_not_optimize(%0#0) : !stream.resource<transient>
-  // CHECK: util.do_not_optimize(%[[SLICE_B]])
-  util.do_not_optimize(%0#1) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[SLICE_A]]
+  util.optimization_barrier %0#0 : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[SLICE_B]]
+  util.optimization_barrier %0#1 : !stream.resource<transient>
   return
 }
 
@@ -32,7 +32,7 @@
   %c0 = arith.constant 0 : index
   %0 = stream.resource.alloc : !stream.resource<transient>{%c0}
 
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%0) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %0 : !stream.resource<transient>
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
index 52f1977..6597ae9 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/pack_dispatch_operands.mlir
@@ -7,8 +7,8 @@
     // CHECK-SAME: (%arg0: i32, %arg1: !stream.binding)
     func.func @device_i1(%arg0: i1 {stream.values = [true, false]}, %arg1: !stream.binding) {
       // CHECK-NEXT: %[[DEV_I1:.+]] = arith.trunci %arg0 {stream.values = [true, false]} : i32 to i1
-      // CHECK-NEXT: util.do_not_optimize(%[[DEV_I1]])
-      util.do_not_optimize(%arg0) : i1
+      // CHECK-NEXT: util.optimization_barrier %[[DEV_I1]]
+      util.optimization_barrier %arg0 : i1
       return
     }
   }
@@ -38,8 +38,8 @@
     func.func @device_bf16(%arg0: bf16, %arg1: !stream.binding) {
       // CHECK-NEXT: %[[DEV_I16:.+]] = arith.trunci %arg0 : i32 to i16
       // CHECK-NEXT: %[[DEV_BF16:.+]] = arith.bitcast %[[DEV_I16]] : i16 to bf16
-      // CHECK-NEXT: util.do_not_optimize(%[[DEV_BF16]])
-      util.do_not_optimize(%arg0) : bf16
+      // CHECK-NEXT: util.optimization_barrier %[[DEV_BF16]]
+      util.optimization_barrier %arg0 : bf16
       return
     }
   }
@@ -73,8 +73,8 @@
       // CHECK-DAG: %[[DEV_HI64:.+]] = arith.extui %[[DEV_HI32]] : i32 to i64
       // CHECK-DAG: %[[DEV_HISHL:.+]] = arith.shli %[[DEV_HI64]], %c32
       // CHECK-DAG: %[[DEV_I64:.+]] = arith.ori %[[DEV_LO64]], %[[DEV_HISHL]] {stream.values = [-1, 8589934595]}
-      // CHECK-NEXT: util.do_not_optimize(%[[DEV_I64]])
-      util.do_not_optimize(%arg0) : i64
+      // CHECK-NEXT: util.optimization_barrier %[[DEV_I64]]
+      util.optimization_barrier %arg0 : i64
       return
     }
   }
@@ -117,8 +117,8 @@
       // CHECK-SAME:   stream.alignment = 16 : index
       // CHECK-SAME:   stream.values = [0 : index, 1234 : index]
       // CHECK-SAME: } : i32 to index
-      // CHECK: util.do_not_optimize(%[[DEV_INDEX]])
-      util.do_not_optimize(%arg0) : index
+      // CHECK: util.optimization_barrier %[[DEV_INDEX]]
+      util.optimization_barrier %arg0 : index
       return
     }
   }
@@ -166,8 +166,8 @@
       // CHECK-SAME:   stream.alignment = 16 : index
       // CHECK-SAME:   stream.values = [0 : index, 1234 : index]
       // CHECK-SAME: } : i64 to index
-      // CHECK: util.do_not_optimize(%[[DEV_INDEX]])
-      util.do_not_optimize(%arg0) : index
+      // CHECK: util.optimization_barrier %[[DEV_INDEX]]
+      util.optimization_barrier %arg0 : index
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
index f192a67..a326a1d 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_subviews.mlir
@@ -20,8 +20,8 @@
   // CHECK-NEXT: %[[LENGTH:.+]] = util.global.load @constantGlobal__length : index
   // CHECK: %[[SUBVIEW:.+]] = stream.resource.subview %[[RESOURCE]][%[[OFFSET]]] : !stream.resource<constant>{%[[STORAGE_SIZE]]} -> !stream.resource<constant>{%[[LENGTH]]}
   %0 = util.global.load @constantGlobal : !stream.resource<constant>
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBVIEW]])
-  util.do_not_optimize(%0) : !stream.resource<constant>
+  // CHECK-NEXT: util.optimization_barrier %[[SUBVIEW]]
+  util.optimization_barrier %0 : !stream.resource<constant>
   return
 }
 
@@ -61,10 +61,10 @@
   // CHECK-NEXT: %[[SUBVIEW0:.+]] = stream.resource.subview %[[RESOURCE0]][%[[OFFSET0]]] : !stream.resource<external>{%[[STORAGE_SIZE0]]} -> !stream.resource<external>{%[[LENGTH0]]}
   // CHECK-NEXT: %[[SUBVIEW1:.+]] = stream.resource.subview %[[RESOURCE1]][%[[OFFSET1]]] : !stream.resource<transient>{%[[STORAGE_SIZE1]]} -> !stream.resource<transient>{%[[LENGTH1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBVIEW0]])
-  util.do_not_optimize(%resource0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBVIEW1]])
-  util.do_not_optimize(%resource1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[SUBVIEW0]]
+  util.optimization_barrier %resource0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[SUBVIEW1]]
+  util.optimization_barrier %resource1 : !stream.resource<transient>
   return
 }
 
@@ -109,10 +109,10 @@
   // CHECK-NEXT: %[[RET_SUBVIEW0:.+]] = stream.resource.subview %[[RET]]#0[%[[RET]]#2] : !stream.resource<external>{%[[RET]]#1} -> !stream.resource<external>{%[[RET]]#3}
   // CHECK-NEXT: %[[RET_SUBVIEW1:.+]] = stream.resource.subview %[[RET]]#4[%[[RET]]#6] : !stream.resource<transient>{%[[RET]]#5} -> !stream.resource<transient>{%[[RET]]#7}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_SUBVIEW0]]) : !stream.resource<external>
-  util.do_not_optimize(%0#0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_SUBVIEW1]]) : !stream.resource<transient>
-  util.do_not_optimize(%0#1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[RET_SUBVIEW0]] : !stream.resource<external>
+  util.optimization_barrier %0#0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[RET_SUBVIEW1]] : !stream.resource<transient>
+  util.optimization_barrier %0#1 : !stream.resource<transient>
 
   return
 }
@@ -144,10 +144,10 @@
   // CHECK-NEXT: %[[BB1_SUBVIEW0:.+]] = stream.resource.subview %[[BB1_RESOURCE0]][%[[BB1_OFFSET0]]] : !stream.resource<external>{%[[BB1_STORAGE_SIZE0]]} -> !stream.resource<external>{%[[BB1_LENGTH0]]}
   // CHECK-NEXT: %[[BB1_SUBVIEW1:.+]] = stream.resource.subview %[[BB1_RESOURCE1]][%[[BB1_OFFSET1]]] : !stream.resource<transient>{%[[BB1_STORAGE_SIZE1]]} -> !stream.resource<transient>{%[[BB1_LENGTH1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[BB1_SUBVIEW0]])
-  util.do_not_optimize(%bb1_resource0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[BB1_SUBVIEW1]])
-  util.do_not_optimize(%bb1_resource1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[BB1_SUBVIEW0]]
+  util.optimization_barrier %bb1_resource0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[BB1_SUBVIEW1]]
+  util.optimization_barrier %bb1_resource1 : !stream.resource<transient>
 
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
index 8bddbc3..e069c73 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/propagate_timepoints.mlir
@@ -16,8 +16,8 @@
   // CHECK-NEXT: %[[SIZE:.+]] = stream.resource.size %[[UNREADY]]
   // CHECK-NEXT: %[[VALUE:.+]] = stream.timepoint.await %[[TIMEPOINT]] => %[[UNREADY]] : !stream.resource<constant>{%[[SIZE]]}
   %0 = util.global.load @constantGlobal : !stream.resource<constant>
-  // CHECK-NEXT: util.do_not_optimize(%[[VALUE]])
-  util.do_not_optimize(%0) : !stream.resource<constant>
+  // CHECK-NEXT: util.optimization_barrier %[[VALUE]]
+  util.optimization_barrier %0 : !stream.resource<constant>
   return
 }
 
@@ -58,10 +58,10 @@
   // CHECK-NEXT: %[[SIZE1:.+]] = stream.resource.size %[[UNREADY1]] : !stream.resource<transient>
   // CHECK-NEXT: %[[READY1:.+]] = stream.timepoint.await %[[TIMEPOINT1]] => %[[UNREADY1]] : !stream.resource<transient>{%[[SIZE1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[READY0]])
-  util.do_not_optimize(%arg0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[READY1]])
-  util.do_not_optimize(%arg1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[READY0]]
+  util.optimization_barrier %arg0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[READY1]]
+  util.optimization_barrier %arg1 : !stream.resource<transient>
   return
 }
 
@@ -109,10 +109,10 @@
   // CHECK-NEXT: %[[RET_SIZE1:.+]] = stream.resource.size %[[RET]]#3 : !stream.resource<transient>
   // CHECK-NEXT: %[[RET_READY1:.+]] = stream.timepoint.await %[[RET]]#2 => %[[RET]]#3 : !stream.resource<transient>{%[[RET_SIZE1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_READY0]]) : !stream.resource<external>
-  util.do_not_optimize(%0#0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_READY1]]) : !stream.resource<transient>
-  util.do_not_optimize(%0#1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[RET_READY0]] : !stream.resource<external>
+  util.optimization_barrier %0#0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[RET_READY1]] : !stream.resource<transient>
+  util.optimization_barrier %0#1 : !stream.resource<transient>
 
   return
 }
@@ -147,10 +147,10 @@
   // CHECK-NEXT: %[[SIZE1:.+]] = stream.resource.size %[[BB1_UNREADY1]] : !stream.resource<transient>
   // CHECK-NEXT: %[[READY1:.+]] = stream.timepoint.await %[[BB1_TIMEPOINT1]] => %[[BB1_UNREADY1]] : !stream.resource<transient>{%10}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[READY0]])
-  util.do_not_optimize(%bb1_arg0) : !stream.resource<external>
-  // CHECK-NEXT: util.do_not_optimize(%[[READY1]])
-  util.do_not_optimize(%bb1_arg1) : !stream.resource<transient>
+  // CHECK-NEXT: util.optimization_barrier %[[READY0]]
+  util.optimization_barrier %bb1_arg0 : !stream.resource<external>
+  // CHECK-NEXT: util.optimization_barrier %[[READY1]]
+  util.optimization_barrier %bb1_arg1 : !stream.resource<transient>
   return
 }
 
@@ -184,7 +184,7 @@
     stream.yield %arg0_capture, %arg1_capture : !stream.resource<external>{%arg0_size}, !stream.resource<transient>{%arg1_size}
   } => !stream.timepoint
   %ready_results:2 = stream.timepoint.await %results_timepoint => %results#0, %results#1 : !stream.resource<external>{%arg0_size}, !stream.resource<transient>{%arg1_size}
-  util.do_not_optimize(%ready_results#0) : !stream.resource<external>
-  util.do_not_optimize(%ready_results#1) : !stream.resource<transient>
+  util.optimization_barrier %ready_results#0 : !stream.resource<external>
+  util.optimization_barrier %ready_results#1 : !stream.resource<transient>
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
index a997bf8..5487c13 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/schedule_allocation.mlir
@@ -35,15 +35,15 @@
 
   // Join the two async ops (constant upload and execution should overlap).
   // CHECK: %[[JOIN:.+]] = stream.timepoint.join max(%[[CST_TIMEPOINT]], %[[EXEC_TIMEPOINT]])
-  // CHECK: util.do_not_optimize(%[[JOIN]]) : !stream.timepoint
-  util.do_not_optimize(%result_timepoint) : !stream.timepoint
+  // CHECK: util.optimization_barrier %[[JOIN]] : !stream.timepoint
+  util.optimization_barrier %result_timepoint : !stream.timepoint
 
-  // CHECK: util.do_not_optimize(%[[CST_RETS]]#0)
-  util.do_not_optimize(%results#0) : !stream.resource<constant>
-  // CHECK: util.do_not_optimize(%[[CST_RETS]]#1)
-  util.do_not_optimize(%results#1) : !stream.resource<constant>
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%results#2) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[CST_RETS]]#0
+  util.optimization_barrier %results#0 : !stream.resource<constant>
+  // CHECK: util.optimization_barrier %[[CST_RETS]]#1
+  util.optimization_barrier %results#1 : !stream.resource<constant>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %results#2 : !stream.resource<transient>
   return
 }
 
@@ -56,14 +56,14 @@
 func.func @explicitAllocs(%size: index) {
   // CHECK: %[[ALLOC:.+]] = stream.resource.alloc : !stream.resource<external>{%[[SIZE]]}
   %alloc = stream.resource.alloc : !stream.resource<external>{%size}
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%alloc) : !stream.resource<external>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %alloc : !stream.resource<external>
 
   %c0 = arith.constant 0 : index
   // CHECK: %[[EMPTY:.+]] = stream.resource.alloc : !stream.resource<transient>{%c0}
   %empty = stream.resource.alloc : !stream.resource<transient>{%c0}
-  // CHECK: util.do_not_optimize(%[[EMPTY]])
-  util.do_not_optimize(%empty) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[EMPTY]]
+  util.optimization_barrier %empty : !stream.resource<transient>
   return
 }
 
@@ -80,8 +80,8 @@
     stream.yield %capture : !stream.resource<transient>{%size}
   // CHECK-NEXT: } => !stream.timepoint
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -116,8 +116,8 @@
     %0 = stream.async.fill %c255_i32, %capture[%c0 to %c128 for %c128] : i32 -> %capture as !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -142,12 +142,12 @@
     %1 = stream.async.splat %c255_i32 : i32 -> !stream.resource<transient>{%size1}
     stream.yield %0, %1 : !stream.resource<transient>{%size0}, !stream.resource<transient>{%size1}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[TIMEPOINT]])
-  util.do_not_optimize(%result_timepoint) : !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[ALLOC_RETS]]#0)
-  util.do_not_optimize(%results#0) : !stream.resource<transient>
-  // CHECK: util.do_not_optimize(%[[ALLOC_RETS]]#1)
-  util.do_not_optimize(%results#1) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[TIMEPOINT]]
+  util.optimization_barrier %result_timepoint : !stream.timepoint
+  // CHECK: util.optimization_barrier %[[ALLOC_RETS]]#0
+  util.optimization_barrier %results#0 : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC_RETS]]#1
+  util.optimization_barrier %results#1 : !stream.resource<transient>
   return
 }
 
@@ -211,10 +211,10 @@
     }
     stream.yield %0#0, %0#1 : !stream.resource<transient>{%size}, !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%results#0) : !stream.resource<transient>
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%results#1) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %results#0 : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %results#1 : !stream.resource<transient>
   return
 }
 
@@ -231,8 +231,8 @@
     %0 = stream.async.splat %c255_i32 : i32 -> !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -251,8 +251,8 @@
     %0 = stream.async.clone %capture : !stream.resource<transient>{%size} -> !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -277,8 +277,8 @@
     %0 = stream.async.slice %capture[%c16 to %c144] : !stream.resource<transient>{%size} -> !stream.resource<transient>{%c128}
     stream.yield %0 : !stream.resource<transient>{%c128}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -297,8 +297,8 @@
     %0 = stream.async.fill %c255_i32, %capture[%c16 to %c144 for %c128] : i32 -> %capture as !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -324,8 +324,8 @@
     %0 = stream.async.update %captured_update, %captured_operand[%c16 to %c144] : !stream.resource<external>{%c128} -> %captured_operand as !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -348,8 +348,8 @@
     %0 = stream.async.copy %captured_source[%c16 to %c144], %captured_target[%c16 to %c144], %c128 : !stream.resource<external>{%size} -> %captured_operand as !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[TARGET]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[TARGET]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -370,8 +370,8 @@
     %0 = stream.async.transfer %capture : !stream.resource<transient>{%size} -> !stream.resource<transient>{%size}
     stream.yield %0 : !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%result) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %result : !stream.resource<transient>
   return
 }
 
@@ -394,12 +394,12 @@
     %0:2 = stream.async.dispatch @executable::@dispatch[%c1, %c1, %c1](%capture, %c4) : (!stream.resource<transient>{%size}, index) -> (%capture{%size}, !stream.resource<transient>{%size})
     stream.yield %0#0, %0#1 : !stream.resource<transient>{%size}, !stream.resource<transient>{%size}
   } => !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[TIMEPOINT]])
-  util.do_not_optimize(%result_timepoint) : !stream.timepoint
-  // CHECK: util.do_not_optimize(%[[OPERAND]])
-  util.do_not_optimize(%results#0) : !stream.resource<transient>
-  // CHECK: util.do_not_optimize(%[[ALLOC]])
-  util.do_not_optimize(%results#1) : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[TIMEPOINT]]
+  util.optimization_barrier %result_timepoint : !stream.timepoint
+  // CHECK: util.optimization_barrier %[[OPERAND]]
+  util.optimization_barrier %results#0 : !stream.resource<transient>
+  // CHECK: util.optimization_barrier %[[ALLOC]]
+  util.optimization_barrier %results#1 : !stream.resource<transient>
   return
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
index ac940e1..f8edc0f 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_dispatches.mlir
@@ -25,16 +25,16 @@
       // CHECK: %[[C:.+]] = tensor.extract %[[LUT_I1]][%[[SITE]], %c0]
       // CHECK: %[[D:.+]] = tensor.extract %[[LUT_I1]][%[[SITE]], %c1]
 
-      // CHECK-NEXT: util.do_not_optimize(%[[BINDING]]) : !stream.binding
-      util.do_not_optimize(%binding) : !stream.binding
-      // CHECK-NEXT: util.do_not_optimize(%[[A]]) : i32
-      util.do_not_optimize(%a) : i32
-      // CHECK-NEXT: util.do_not_optimize(%[[B]]) : index
-      util.do_not_optimize(%b) : index
-      // CHECK-NEXT: util.do_not_optimize(%[[C]]) : i1
-      util.do_not_optimize(%c) : i1
-      // CHECK-NEXT: util.do_not_optimize(%[[D]]) : i1
-      util.do_not_optimize(%d) : i1
+      // CHECK-NEXT: util.optimization_barrier %[[BINDING]] : !stream.binding
+      util.optimization_barrier %binding : !stream.binding
+      // CHECK-NEXT: util.optimization_barrier %[[A]] : i32
+      util.optimization_barrier %a : i32
+      // CHECK-NEXT: util.optimization_barrier %[[B]] : index
+      util.optimization_barrier %b : index
+      // CHECK-NEXT: util.optimization_barrier %[[C]] : i1
+      util.optimization_barrier %c : i1
+      // CHECK-NEXT: util.optimization_barrier %[[D]] : i1
+      util.optimization_barrier %d : i1
       return
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
index a944155..0913855 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Conversion/ConversionPatterns.cpp
@@ -30,8 +30,9 @@
 void populateUtilConversionPatterns(MLIRContext *context,
                                     TypeConverter &typeConverter,
                                     RewritePatternSet &patterns) {
-  patterns.insert<GenericConvertTypesPattern<IREE::Util::DoNotOptimizeOp>>(
-      typeConverter, context);
+  patterns
+      .insert<GenericConvertTypesPattern<IREE::Util::OptimizationBarrierOp>>(
+          typeConverter, context);
 
   typeConverter.addConversion([&](IREE::Util::PtrType type,
                                   SmallVectorImpl<Type> &results) {
@@ -60,8 +61,8 @@
                                     ConversionTarget &conversionTarget,
                                     TypeConverter &typeConverter,
                                     RewritePatternSet &patterns) {
-  addGenericLegalOp<IREE::Util::DoNotOptimizeOp>(conversionTarget,
-                                                 typeConverter);
+  addGenericLegalOp<IREE::Util::OptimizationBarrierOp>(conversionTarget,
+                                                       typeConverter);
   addGenericLegalOp<IREE::Util::ListCreateOp>(conversionTarget, typeConverter);
   addGenericLegalOp<IREE::Util::ListGetOp>(conversionTarget, typeConverter);
   addGenericLegalOp<IREE::Util::ListSetOp>(conversionTarget, typeConverter);
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
index e646c7e..f073eb4 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOpFolders.cpp
@@ -416,7 +416,8 @@
                                 PatternRewriter &rewriter) const override {
     auto stdConst =
         rewriter.create<arith::ConstantOp>(op.getLoc(), op.getValue());
-    rewriter.replaceOpWithNewOp<DoNotOptimizeOp>(op, stdConst.getResult());
+    rewriter.replaceOpWithNewOp<OptimizationBarrierOp>(op,
+                                                       stdConst.getResult());
     return success();
   }
 };
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
index 11ac0df..d1a71ad 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.cpp
@@ -606,48 +606,18 @@
 namespace Util {
 
 //===----------------------------------------------------------------------===//
-// util.do_not_optimize
+// util.optimization_barrier
 //===----------------------------------------------------------------------===//
 
-void DoNotOptimizeOp::build(OpBuilder &builder, OperationState &state,
-                            ValueRange operands,
-                            ArrayRef<NamedAttribute> attributes) {
+void OptimizationBarrierOp::build(OpBuilder &builder, OperationState &state,
+                                  ValueRange operands,
+                                  ArrayRef<NamedAttribute> attributes) {
   state.addOperands(operands);
   state.addTypes(llvm::to_vector<2>(operands.getTypes()));
   state.addAttributes(attributes);
 }
 
-ParseResult DoNotOptimizeOp::parse(OpAsmParser &parser, OperationState &state) {
-  SmallVector<OpAsmParser::UnresolvedOperand, 2> args;
-  // Operands and results have the same types.
-  auto &operandTypes = state.types;
-
-  if (failed(parser.parseLParen()) || failed(parser.parseOperandList(args)) ||
-      failed(parser.parseRParen()) ||
-      failed(parser.parseOptionalAttrDict(state.attributes)) ||
-      failed(parser.parseOptionalColonTypeList(state.types)) ||
-      failed(parser.resolveOperands(
-          args, operandTypes, parser.getCurrentLocation(), state.operands))) {
-    return failure();
-  }
-
-  return success();
-}
-
-void DoNotOptimizeOp::print(OpAsmPrinter &p) {
-  Operation *op = getOperation();
-  p << "(";
-  p.printOperands(op->getOperands());
-  p << ")";
-  p.printOptionalAttrDict(op->getAttrs());
-
-  if (op->getNumOperands() != 0) {
-    p << " : ";
-    interleaveComma(getOperandTypes(), p);
-  }
-}
-
-LogicalResult DoNotOptimizeOp::verify() {
+LogicalResult OptimizationBarrierOp::verify() {
   Operation *op = getOperation();
   if (op->getNumOperands() != op->getNumResults()) {
     return op->emitOpError()
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
index bcfd4ef..7e67fd7 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilOps.td
@@ -305,25 +305,31 @@
 // Compiler hints
 //===----------------------------------------------------------------------===//
 
-def Util_DoNotOptimizeOp : Util_Op<"do_not_optimize", [
+def Util_OptimizationBarrierOp : Util_Op<"optimization_barrier", [
   SameOperandsAndResultType,
 ]> {
-  let summary = "Prevents compiler optimizations of a value.";
+  let summary = "Prevents compiler optimizations across a value.";
   let description = [{
-    Wraps any operands in an unoptimizable identity. This operation is declared
-    as having side effects, so no compiler optimizations will be able to reason
-    about it. This prevents its results from being folded. It will be dropped as
-    the final step in compilation.
+    Wraps any operands in an unoptimizable identity to prevent its results from
+    being folded. It will be dropped during the final step in compilation and
+    has no effect at runtime.
   }];
-  let arguments = (ins Variadic<AnyType>:$arguments);
+  let arguments = (ins Variadic<AnyType>:$operands);
   let results = (outs Variadic<AnyType>:$results);
-  let hasVerifier = 1;
+
+  let assemblyFormat = [{
+    attr-dict
+    ($operands^ `:` type($operands))?
+  }];
+
   let builders = [
     OpBuilder<(ins
       "ValueRange":$operands,
       CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes
     )>,
   ];
+
+  let hasVerifier = 1;
 }
 
 def Util_UnfoldableConstantOp : Util_Op<"unfoldable_constant"> {
@@ -331,7 +337,7 @@
   let description = [{
     Similar to a std.constant, but is declared as having a side effect and has
     no folder. This is really just syntactic sugar as it is canonicalized to a
-    std.constant wrapped in an util.do_not_optimize.
+    std.constant wrapped in an util.optimization_barrier.
   }];
 
   let arguments = (ins AnyAttr:$value);
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
index ce3a5fc..94d700f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/buffer_folding.mlir
@@ -82,13 +82,13 @@
     %buffer_size_inner = util.buffer.size %buffer : !util.buffer
     // CHECK: util.buffer.load %[[BUFFER]]{{.+}} : !util.buffer{%[[BUFFER_SIZE_INNER]]}
     %inner = util.buffer.load %buffer[%i] : !util.buffer{%buffer_size_inner} -> i8
-    util.do_not_optimize(%inner) : i8
+    util.optimization_barrier %inner : i8
   }
   // CHECK: %[[BUFFER_SIZE_OUTER:.+]] = util.buffer.size %[[BUFFER]]
   %buffer_size_outer = util.buffer.size %buffer : !util.buffer
   // CHECK: util.buffer.load %[[BUFFER]]{{.+}} : !util.buffer{%[[BUFFER_SIZE_OUTER]]}
   %outer = util.buffer.load %buffer[%c128] : !util.buffer{%buffer_size_outer} -> i8
-  util.do_not_optimize(%outer) : i8
+  util.optimization_barrier %outer : i8
   return
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
index 53865be..e78803f 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_folding.mlir
@@ -4,8 +4,8 @@
 func.func @no_fold_constant() -> (i32) {
   // CHECK: constant 1 : i32
   %0 = arith.constant 1 : i32
-  // CHECK: util.do_not_optimize
-  %1 = "util.do_not_optimize"(%0) : (i32) -> i32
+  // CHECK: util.optimization_barrier
+  %1 = "util.optimization_barrier"(%0) : (i32) -> i32
   return %1 : i32
 }
 
@@ -15,8 +15,8 @@
 func.func @no_fold_add() -> (i32) {
   // CHECK-NEXT: %[[C1:.+]] = vm.const.i32 1
   %c1 = vm.const.i32 1
-  // CHECK-NEXT: %[[R1:.+]] = util.do_not_optimize(%[[C1]])
-  %0 = util.do_not_optimize(%c1) : i32
+  // CHECK-NEXT: %[[R1:.+]] = util.optimization_barrier %[[C1]]
+  %0 = util.optimization_barrier %c1 : i32
   // CHECK-NEXT: %[[R2:.+]] = vm.add.i32 %[[R1]], %[[R1]]
   %1 = vm.add.i32 %0, %0 : i32
   // CHECK-NEXT: return %[[R2]]
@@ -25,7 +25,7 @@
 
 // -----
 
-// Exists to check that the above succeeds because of do_not_optimize
+// Exists to check that the above succeeds when there's no barrier.
 // CHECK-LABEL: @fold_add
 func.func @fold_add() -> (i32) {
   // CHECK-NEXT: %[[C2:.+]] = vm.const.i32 2
@@ -39,7 +39,7 @@
 
 func.func @result_operand_count_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
   // expected-error@+1 {{must have same number of operands and results}}
-  %1 = "util.do_not_optimize"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  %1 = "util.optimization_barrier"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
   return
 }
 
@@ -47,7 +47,7 @@
 
 func.func @result_operand_type_mismatch(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
   // expected-error@+1 {{must have same operand and result types, but they differ at index 1}}
-  %1:2 = "util.do_not_optimize"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> (tensor<i32>, memref<i32>)
+  %1:2 = "util.optimization_barrier"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> (tensor<i32>, memref<i32>)
   return
 }
 
@@ -56,7 +56,7 @@
 // CHECK-LABEL: @canonicalize_unfoldable_constant
 func.func @canonicalize_unfoldable_constant() -> i32 {
   // CHECK-NEXT: %[[C:.+]] = arith.constant 42 : i32
-  // CHECK-NEXT: %[[R:.+]] = util.do_not_optimize(%[[C]]) : i32
+  // CHECK-NEXT: %[[R:.+]] = util.optimization_barrier %[[C]] : i32
   %c42 = util.unfoldable_constant 42 : i32
   // CHECK-NEXT: return %[[R]]
   return %c42 : i32
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
index c9ca8ae..d8e79c5 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/test/hint_ops.mlir
@@ -1,17 +1,17 @@
 // RUN: iree-opt --split-input-file %s | iree-opt --split-input-file | FileCheck %s
 
-// CHECK-LABEL: @parse_print_do_not_optimize
+// CHECK-LABEL: @parse_print_barrier
 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9$._-]+]]
 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9$._-]+]]
-func.func @parse_print_do_not_optimize(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
-  // CHECK-NEXT: util.do_not_optimize(%[[ARG0]]) : tensor<i32>
-  %1 = util.do_not_optimize(%arg0) : tensor<i32>
+func.func @parse_print_barrier(%arg0 : tensor<i32>, %arg1 : tensor<i32>) {
+  // CHECK-NEXT: util.optimization_barrier %[[ARG0]] : tensor<i32>
+  %1 = util.optimization_barrier %arg0 : tensor<i32>
 
-  // CHECK-NEXT: util.do_not_optimize(%[[ARG0]], %[[ARG1]]) : tensor<i32>, tensor<i32>
-  %2:2 = util.do_not_optimize(%arg0, %arg1) : tensor<i32>, tensor<i32>
+  // CHECK-NEXT: util.optimization_barrier %[[ARG0]], %[[ARG1]] : tensor<i32>, tensor<i32>
+  %2:2 = util.optimization_barrier %arg0, %arg1 : tensor<i32>, tensor<i32>
 
-  // CHECK-NEXT: util.do_not_optimize(%[[ARG0]]) {some_unit} : tensor<i32>
-  %has_attr = util.do_not_optimize(%arg0) {some_unit} : tensor<i32>
+  // CHECK-NEXT: util.optimization_barrier {some_unit} %[[ARG0]] : tensor<i32>
+  %has_attr = util.optimization_barrier {some_unit} %arg0 : tensor<i32>
 
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp
index 6853830..9b9142e 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/DropCompilerHints.cpp
@@ -22,7 +22,7 @@
   void runOnOperation() override {
     // We can't use patterns and applyPatternsAndFoldGreedily because that
     // automatically does canonicalization.
-    getOperation()->walk([&](DoNotOptimizeOp op) {
+    getOperation()->walk([&](IREE::Util::OptimizationBarrierOp op) {
       op.replaceAllUsesWith(op.getOperands());
       op.erase();
     });
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp b/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp
index f471884..e757c2b 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/SimplifyGlobalAccesses.cpp
@@ -195,7 +195,7 @@
 static bool rearrangeBlockGlobalAccesses(
     Block &block, DenseSet<StringRef> &immutableGlobals) {
   // Gather sequences of operations that are safe to reorder.
-  // Certain ops - like calls/do_not_optimize/etc - prevent us from moving any
+  // Certain ops - like calls/barriers/etc - prevent us from moving any
   // global operations across them.
   //
   // From each sequence we produce [symbol_name, [op, op, op, ...]] buckets.
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
index fee2739..319bd85 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/drop_compiler_hints.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --split-input-file --iree-util-drop-compiler-hints %s | FileCheck --implicit-check-not="util.do_not_optimize" %s
+// RUN: iree-opt --split-input-file --iree-util-drop-compiler-hints %s | FileCheck --implicit-check-not="util.optimization_barrier" %s
 
 // This file is used as an example in docs/developing_iree/developer_overview.md.
 // If you move or delete it, please update the documentation accordingly.
@@ -7,7 +7,7 @@
 func.func @constant() -> i32 {
   // CHECK-NEXT: %[[C1:.+]] = arith.constant 1
   %c1 = arith.constant 1 : i32
-  %0 = util.do_not_optimize(%c1) : i32
+  %0 = util.optimization_barrier %c1 : i32
   // CHECK-NEXT: return %[[C1]]
   return %0 : i32
 }
@@ -18,12 +18,12 @@
 func.func @multiple() -> (i32, i32) {
   // CHECK-NEXT: %[[C1:.+]] = arith.constant 1
   %c1 = arith.constant 1 : i32
-  %0 = util.do_not_optimize(%c1) : i32
-  %1 = util.do_not_optimize(%0) : i32
+  %0 = util.optimization_barrier %c1 : i32
+  %1 = util.optimization_barrier %0 : i32
   // CHECK-NEXT: %[[C2:.+]] = arith.constant 2
   %c2 = arith.constant 2 : i32
-  %2 = util.do_not_optimize(%1) : i32
-  %3 = util.do_not_optimize(%c2) : i32
+  %2 = util.optimization_barrier %1 : i32
+  %3 = util.optimization_barrier %c2 : i32
   // CHECK-NEXT: return %[[C1]], %[[C2]]
   return %2, %3 : i32, i32
 }
@@ -36,7 +36,7 @@
   %c1 = arith.constant 1 : i32
   // CHECK-NEXT: %[[C2:.+]] = arith.constant 2
   %c2 = arith.constant 2 : i32
-  %0, %1 = util.do_not_optimize(%c1, %c2) : i32, i32
+  %0, %1 = util.optimization_barrier %c1, %c2 : i32, i32
   // CHECK-NEXT: return %[[C1]], %[[C2]]
   return %0, %1 : i32, i32
 }
@@ -47,7 +47,7 @@
 func.func @no_fold_add() -> (i32) {
   // CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : i32
   %c1 = arith.constant 1 : i32
-  %0 = util.do_not_optimize(%c1) : i32
+  %0 = util.optimization_barrier %c1 : i32
   // CHECK-NEXT: %[[R:.+]] = arith.addi %[[C1]], %[[C1]]
   %1 = arith.addi %0, %0 : i32
   // CHECK-NEXT: return %[[R]]
@@ -66,7 +66,7 @@
       func.func @constant() -> i32 {
         // CHECK-NEXT: %[[C1:.+]] = arith.constant 1
         %c1 = arith.constant 1 : i32
-        %0 = util.do_not_optimize(%c1) : i32
+        %0 = util.optimization_barrier %c1 : i32
         // CHECK-NEXT: return %[[C1]]
         return %0 : i32
       }
diff --git a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
index c6be73b..81cbff3 100644
--- a/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
+++ b/compiler/src/iree/compiler/Dialect/Util/Transforms/test/propagate_subranges.mlir
@@ -18,8 +18,8 @@
   // CHECK-NEXT: %[[LENGTH:.+]] = util.global.load @constantGlobal__length : index
   // CHECK: %[[SUBRANGE:.+]] = util.buffer.subspan %[[RESOURCE]][%[[OFFSET]]] : !util.buffer{%[[STORAGE_SIZE]]} -> !util.buffer{%[[LENGTH]]}
   %0 = util.global.load @constantGlobal : !util.buffer
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBRANGE]])
-  util.do_not_optimize(%0) : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[SUBRANGE]]
+  util.optimization_barrier %0 : !util.buffer
   return
 }
 
@@ -59,10 +59,10 @@
   // CHECK-NEXT: %[[SUBRANGE0:.+]] = util.buffer.subspan %[[RESOURCE0]][%[[OFFSET0]]] : !util.buffer{%[[STORAGE_SIZE0]]} -> !util.buffer{%[[LENGTH0]]}
   // CHECK-NEXT: %[[SUBRANGE1:.+]] = util.buffer.subspan %[[RESOURCE1]][%[[OFFSET1]]] : !util.buffer{%[[STORAGE_SIZE1]]} -> !util.buffer{%[[LENGTH1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBRANGE0]])
-  util.do_not_optimize(%resource0) : !util.buffer
-  // CHECK-NEXT: util.do_not_optimize(%[[SUBRANGE1]])
-  util.do_not_optimize(%resource1) : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[SUBRANGE0]]
+  util.optimization_barrier %resource0 : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[SUBRANGE1]]
+  util.optimization_barrier %resource1 : !util.buffer
   return
 }
 
@@ -119,10 +119,10 @@
   // CHECK-NEXT: %[[RET_SUBRANGE0:.+]] = util.buffer.subspan %[[RET]]#0[%[[RET]]#2] : !util.buffer{%[[RET]]#1} -> !util.buffer{%[[RET]]#3}
   // CHECK-NEXT: %[[RET_SUBRANGE1:.+]] = util.buffer.subspan %[[RET]]#4[%[[RET]]#6] : !util.buffer{%[[RET]]#5} -> !util.buffer{%[[RET]]#7}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_SUBRANGE0]]) : !util.buffer
-  util.do_not_optimize(%0#0) : !util.buffer
-  // CHECK-NEXT: util.do_not_optimize(%[[RET_SUBRANGE1]]) : !util.buffer
-  util.do_not_optimize(%0#1) : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[RET_SUBRANGE0]] : !util.buffer
+  util.optimization_barrier %0#0 : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[RET_SUBRANGE1]] : !util.buffer
+  util.optimization_barrier %0#1 : !util.buffer
 
   return
 }
@@ -192,8 +192,8 @@
   %ret1 = call @callee(%ret0_subspan) : (!util.buffer) -> (!util.buffer)
   // CHECK: %[[RET1_SUBRANGE:.+]] = util.buffer.subspan %[[RET1]]#0[%[[RET1]]#2] : !util.buffer{%[[RET1]]#1} -> !util.buffer{%[[RET1]]#3}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[RET1_SUBRANGE]]) : !util.buffer
-  util.do_not_optimize(%ret1) : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[RET1_SUBRANGE]] : !util.buffer
+  util.optimization_barrier %ret1 : !util.buffer
 
   return
 }
@@ -225,10 +225,10 @@
   // CHECK-NEXT: %[[BB1_SUBRANGE0:.+]] = util.buffer.subspan %[[BB1_RESOURCE0]][%[[BB1_OFFSET0]]] : !util.buffer{%[[BB1_STORAGE_SIZE0]]} -> !util.buffer{%[[BB1_LENGTH0]]}
   // CHECK-NEXT: %[[BB1_SUBRANGE1:.+]] = util.buffer.subspan %[[BB1_RESOURCE1]][%[[BB1_OFFSET1]]] : !util.buffer{%[[BB1_STORAGE_SIZE1]]} -> !util.buffer{%[[BB1_LENGTH1]]}
 
-  // CHECK-NEXT: util.do_not_optimize(%[[BB1_SUBRANGE0]])
-  util.do_not_optimize(%bb1_resource0) : !util.buffer
-  // CHECK-NEXT: util.do_not_optimize(%[[BB1_SUBRANGE1]])
-  util.do_not_optimize(%bb1_resource1) : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[BB1_SUBRANGE0]]
+  util.optimization_barrier %bb1_resource0 : !util.buffer
+  // CHECK-NEXT: util.optimization_barrier %[[BB1_SUBRANGE1]]
+  util.optimization_barrier %bb1_resource1 : !util.buffer
 
   return
 }
diff --git a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
index e65c40d..02e1bed 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
+++ b/compiler/src/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
@@ -6,8 +6,8 @@
     // CHECK: %[[REF:.+]] = "emitc.variable"() {value = #emitc.opaque<"">} : () -> !emitc.opaque<"iree_vm_ref_t">
     // CHECK: %[[REFPTR:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
     %list = vm.list.alloc %arg0 : (i32) -> !vm.list<i32>
-    %list_dno = util.do_not_optimize(%list) : !vm.list<i32>
-    // CHECK: util.do_not_optimize(%[[REFPTR]]) : !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
+    %list_dno = util.optimization_barrier %list : !vm.list<i32>
+    // CHECK: util.optimization_barrier %[[REFPTR]] : !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
     vm.return
   }
 
@@ -18,8 +18,8 @@
     // CHECK: %[[REFPTR:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
     %size = vm.list.size %list : (!vm.list<i32>) -> i32
     // CHECK: %[[SIZE:.+]] = emitc.call "iree_vm_list_size"(%{{.+}})
-    %size_dno = util.do_not_optimize(%size) : i32
-    // CHECK: util.do_not_optimize(%[[SIZE]]) : i32
+    %size_dno = util.optimization_barrier %size : i32
+    // CHECK: util.optimization_barrier %[[SIZE]] : i32
     vm.return
   }
 }
@@ -34,8 +34,8 @@
     // CHECK: %[[REF:.+]] = "emitc.variable"() {value = #emitc.opaque<"">} : () -> !emitc.opaque<"iree_vm_ref_t">
     // CHECK: %[[REFPTR:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
     %buffer = vm.const.ref.rodata @byte_buffer : !vm.buffer
-    %buffer_dno = util.do_not_optimize(%buffer) : !vm.buffer
-    // CHECK: util.do_not_optimize(%[[REFPTR]]) : !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
+    %buffer_dno = util.optimization_barrier %buffer : !vm.buffer
+    // CHECK: util.optimization_barrier %[[REFPTR]] : !emitc.ptr<!emitc.opaque<"iree_vm_ref_t">>
     vm.return
   }
 }
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
index 0ba032e..7f38e52 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
@@ -181,7 +181,7 @@
   RewritePatternSet patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
-  target.addLegalOp<IREE::Util::DoNotOptimizeOp>();
+  target.addLegalOp<IREE::Util::OptimizationBarrierOp>();
 
   // Add all VM canonicalization patterns and mark pseudo-ops illegal.
   auto *context = moduleOp.getContext();
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
index 6056b92..7c30edb 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
@@ -297,7 +297,7 @@
   RewritePatternSet patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
-  target.addLegalOp<IREE::Util::DoNotOptimizeOp>();
+  target.addLegalOp<IREE::Util::OptimizationBarrierOp>();
 
   // Add all VM canonicalization patterns and mark pseudo-ops illegal.
   auto *context = moduleOp.getContext();
diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/materialize_constants.mlir b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/materialize_constants.mlir
index 9363d98..efbc483 100644
--- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/materialize_constants.mlir
+++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/test/materialize_constants.mlir
@@ -35,13 +35,13 @@
 func.func private @constant_user() {
   // CHECK: %[[FOO_LOADED:.+]] = util.global.load @__constant_foo
   %value_0 = hal.executable.constant.load "foo" : i32
-  // CHECK: util.do_not_optimize(%[[FOO_LOADED]])
-  util.do_not_optimize(%value_0) : i32
+  // CHECK: util.optimization_barrier %[[FOO_LOADED]]
+  util.optimization_barrier %value_0 : i32
 
   // CHECK: %[[BAR_LOADED:.+]] = util.global.load @__constant_bar
   %value_1 = hal.executable.constant.load "bar" : i32
-  // CHECK: util.do_not_optimize(%[[BAR_LOADED]])
-  util.do_not_optimize(%value_1) : i32
+  // CHECK: util.optimization_barrier %[[BAR_LOADED]]
+  util.optimization_barrier %value_1 : i32
 
   return
 }
diff --git a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
index 493052a..a826f1b 100644
--- a/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
+++ b/compiler/src/iree/compiler/Modules/HAL/Inline/Transforms/test/inline_executables.mlir
@@ -55,7 +55,7 @@
 
         // Test for global constants:
         %global_constant = util.global.load @global_constant : !util.buffer
-        util.do_not_optimize(%global_constant) : !util.buffer
+        util.optimization_barrier %global_constant : !util.buffer
 
 
         %c4 = arith.constant 4 : index
@@ -105,7 +105,7 @@
 
 // Globals get carried across:
 // CHECK: %[[GLOBAL_CONSTANT:.+]] = util.global.load @global_constant_0 : !util.buffer
-// CHECK: util.do_not_optimize(%[[GLOBAL_CONSTANT]])
+// CHECK: util.optimization_barrier %[[GLOBAL_CONSTANT]]
 
 // CHECK: %[[X_IDX:.+]] = arith.index_cast %[[X_I32]]
 // CHECK: scf.for %[[ELEMENT_INDEX:.+]] = %c0 to %[[X_IDX]]
diff --git a/docs/developers/developing_iree/testing_guide.md b/docs/developers/developing_iree/testing_guide.md
index b5cdb08..d20b98a 100644
--- a/docs/developers/developing_iree/testing_guide.md
+++ b/docs/developers/developing_iree/testing_guide.md
@@ -286,12 +286,10 @@
 Test cases are created in gtest for each public function exported by the module.
 
 Note the use of `util.unfoldable_constant` to specify test constants. If we were
-to use a regular constant, the compiler would "helpfully" fold away everything
-at compile time and our test would not actually test the runtime.
-`unfoldable_constant` hides the value of the constant from the compiler so it
-cannot use it at compile time. To hide an arbitrary SSA-value, you can use
-`util.do_not_optimize`. This wraps any value in an unoptimizable identity
-function.
+to use a regular constant the compiler would fold away everything at compile
+time and our test would not actually test the runtime. `unfoldable_constant`
+adds a barrier that prevents folding. To prevent folding/constant propagate on
+an arbitrary SSA-value you can use `util.optimization_barrier`.
 
 Next we use this input constant to exercise the runtime feature under test (in
 this case, just a single floor operation). Finally, we use a check dialect
diff --git a/runtime/src/iree/vm/test/arithmetic_ops.mlir b/runtime/src/iree/vm/test/arithmetic_ops.mlir
index 60bb59d..9046b6b 100644
--- a/runtime/src/iree/vm/test/arithmetic_ops.mlir
+++ b/runtime/src/iree/vm/test/arithmetic_ops.mlir
@@ -7,7 +7,7 @@
   vm.export @test_add_i32
   vm.func @test_add_i32() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.add.i32 %c1dno, %c1dno : i32
     %c2 = vm.const.i32 2
     vm.check.eq %v, %c2, "1+1=2" : i32
@@ -17,9 +17,9 @@
   vm.export @test_sub_i32
   vm.func @test_sub_i32() {
     %c1 = vm.const.i32 3
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.sub.i32 %c1dno, %c2dno : i32
     %c3 = vm.const.i32 1
     vm.check.eq %v, %c3, "3-2=1" : i32
@@ -29,7 +29,7 @@
   vm.export @test_mul_i32
   vm.func @test_mul_i32() {
     %c1 = vm.const.i32 2
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.mul.i32 %c1dno, %c1dno : i32
     %c2 = vm.const.i32 4
     vm.check.eq %v, %c2, "2*2=4" : i32
@@ -39,9 +39,9 @@
   vm.export @test_div_i32s
   vm.func @test_div_i32s() {
     %c1 = vm.const.i32 4
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 -2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.div.i32.s %c1dno, %c2dno : i32
     %c3 = vm.const.i32 -2
     vm.check.eq %v, %c3, "4/-2=-2" : i32
@@ -51,9 +51,9 @@
   vm.export @test_div_i32u
   vm.func @test_div_i32u() {
     %c1 = vm.const.i32 4
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.div.i32.u %c1dno, %c2dno : i32
     %c3 = vm.const.i32 2
     vm.check.eq %v, %c3, "4/2=2" : i32
@@ -63,9 +63,9 @@
   vm.export @test_rem_i32s
   vm.func @test_rem_i32s() {
     %c1 = vm.const.i32 -3
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 -2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.rem.i32.s %c1dno, %c2dno : i32
     %c3 = vm.const.i32 -1
     vm.check.eq %v, %c3, "-3%-2=-1" : i32
@@ -75,9 +75,9 @@
   vm.export @test_rem_i32u
   vm.func @test_rem_i32u() {
     %c1 = vm.const.i32 3
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.rem.i32.u %c1dno, %c2dno : i32
     %c3 = vm.const.i32 1
     vm.check.eq %v, %c3, "3%2=1" : i32
@@ -87,11 +87,11 @@
   vm.export @test_fma_i32
   vm.func @test_fma_i32() {
     %c2 = vm.const.i32 2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %c3 = vm.const.i32 3
-    %c3dno = util.do_not_optimize(%c3) : i32
+    %c3dno = util.optimization_barrier %c3 : i32
     %c5 = vm.const.i32 5
-    %c5dno = util.do_not_optimize(%c5) : i32
+    %c5dno = util.optimization_barrier %c5 : i32
     %v = vm.fma.i32 %c2dno, %c3dno, %c5dno : i32
     %c11 = vm.const.i32 11
     vm.check.eq %v, %c11, "2*3+5=11" : i32
@@ -101,7 +101,7 @@
   vm.export @test_abs_i32
   vm.func @test_abs_i32() {
     %c1 = vm.const.i32 -1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.abs.i32 %c1dno : i32
     %c2 = vm.const.i32 1
     vm.check.eq %v, %c2, "abs(-1)=1" : i32
@@ -111,7 +111,7 @@
   vm.export @test_not_i32
   vm.func @test_not_i32() {
     %c1 = vm.const.i32 0
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.not.i32 %c1dno : i32
     %c2 = vm.const.i32 -1
     vm.check.eq %v, %c2, "~0=-1" : i32
@@ -121,9 +121,9 @@
   vm.export @test_and_i32
   vm.func @test_and_i32() {
     %c1 = vm.const.i32 5
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 3
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.and.i32 %c1dno, %c2dno : i32
     %c3 = vm.const.i32 1
     vm.check.eq %v, %c3, "5&3=1" : i32
@@ -133,9 +133,9 @@
   vm.export @test_or_i32
   vm.func @test_or_i32() {
     %c1 = vm.const.i32 5
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 3
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.or.i32 %c1dno, %c2dno : i32
     %c3 = vm.const.i32 7
     vm.check.eq %v, %c3, "5|3=7" : i32
@@ -145,9 +145,9 @@
   vm.export @test_xor_i32
   vm.func @test_xor_i32() {
     %c1 = vm.const.i32 5
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 3
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     %v = vm.xor.i32 %c1dno, %c2dno : i32
     %c3 = vm.const.i32 6
     vm.check.eq %v, %c3, "5^3=6" : i32
@@ -157,7 +157,7 @@
   vm.export @test_ctlz_i32_const_zero
   vm.func @test_ctlz_i32_const_zero() {
     %c = vm.const.i32 0
-    %cdno = util.do_not_optimize(%c) : i32
+    %cdno = util.optimization_barrier %c : i32
     %actual = vm.ctlz.i32 %cdno : i32
     %expected = vm.const.i32 32
     vm.check.eq %actual, %expected, "ctlz(0)=32" : i32
@@ -167,7 +167,7 @@
   vm.export @test_ctlz_i32_const_1
   vm.func @test_ctlz_i32_const_1() {
     %c = vm.const.i32 1
-    %cdno = util.do_not_optimize(%c) : i32
+    %cdno = util.optimization_barrier %c : i32
     %actual = vm.ctlz.i32 %cdno : i32
     %expected = vm.const.i32 31
     vm.check.eq %actual, %expected, "ctlz(1)=31" : i32
@@ -177,7 +177,7 @@
   vm.export @test_ctlz_i32_const_ffffffff
   vm.func @test_ctlz_i32_const_ffffffff() {
     %c = vm.const.i32 0xFFFFFFFF
-    %cdno = util.do_not_optimize(%c) : i32
+    %cdno = util.optimization_barrier %c : i32
     %actual = vm.ctlz.i32 %cdno : i32
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "ctlz(0xFFFFFFFF)=0" : i32
diff --git a/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir b/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir
index f23cf94..ce478fb 100644
--- a/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir
+++ b/runtime/src/iree/vm/test/arithmetic_ops_f32.mlir
@@ -7,7 +7,7 @@
   vm.export @test_add_f32
   vm.func @test_add_f32() {
     %c1 = vm.const.f32 1.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.add.f32 %c1dno, %c1dno : f32
     %c2 = vm.const.f32 3.0
     vm.check.eq %v, %c2, "1.5+1.5=3" : f32
@@ -17,9 +17,9 @@
   vm.export @test_sub_f32
   vm.func @test_sub_f32() {
     %c1 = vm.const.f32 3.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %c2 = vm.const.f32 2.5
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %v = vm.sub.f32 %c1dno, %c2dno : f32
     %c3 = vm.const.f32 0.5
     vm.check.eq %v, %c3, "3.0-2.5=0.5" : f32
@@ -29,7 +29,7 @@
   vm.export @test_mul_f32
   vm.func @test_mul_f32() {
     %c1 = vm.const.f32 2.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.mul.f32 %c1dno, %c1dno : f32
     %c2 = vm.const.f32 6.25
     vm.check.eq %v, %c2, "2.5*2.5=6.25" : f32
@@ -39,9 +39,9 @@
   vm.export @test_div_f32
   vm.func @test_div_f32() {
     %c1 = vm.const.f32 4.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %c2 = vm.const.f32 -2.0
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %v = vm.div.f32 %c1dno, %c2dno : f32
     %c3 = vm.const.f32 -2.0
     vm.check.eq %v, %c3, "4.0/-2.0=-2.0" : f32
@@ -51,9 +51,9 @@
   vm.export @test_rem_f32
   vm.func @test_rem_f32() {
     %c1 = vm.const.f32 -3.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %c2 = vm.const.f32 -2.0
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %v = vm.rem.f32 %c1dno, %c2dno : f32
     %c3 = vm.const.f32 1.0
     vm.check.eq %v, %c3, "-3.0%-2.0=1.0" : f32
@@ -63,11 +63,11 @@
   vm.export @test_fma_f32
   vm.func @test_fma_f32() {
     %c2 = vm.const.f32 2.0
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %c3 = vm.const.f32 3.0
-    %c3dno = util.do_not_optimize(%c3) : f32
+    %c3dno = util.optimization_barrier %c3 : f32
     %c5 = vm.const.f32 5.0
-    %c5dno = util.do_not_optimize(%c5) : f32
+    %c5dno = util.optimization_barrier %c5 : f32
     %v = vm.fma.f32 %c2dno, %c3dno, %c5dno : f32
     %c11 = vm.const.f32 11.0
     vm.check.eq %v, %c11, "2.0*3.0+5.0=11.0" : f32
@@ -77,7 +77,7 @@
   vm.export @test_abs_f32
   vm.func @test_abs_f32() {
     %c1 = vm.const.f32 -1.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.abs.f32 %c1dno : f32
     %c2 = vm.const.f32 1.0
     vm.check.eq %v, %c2, "abs(-1.0)=1.0" : f32
@@ -87,7 +87,7 @@
   vm.export @test_neg_f32
   vm.func @test_neg_f32() {
     %c1 = vm.const.f32 -1.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.neg.f32 %c1dno : f32
     %c2 = vm.const.f32 1.0
     vm.check.eq %v, %c2, "neg(-1.0)=1.0" : f32
@@ -97,7 +97,7 @@
   vm.export @test_ceil_f32
   vm.func @test_ceil_f32() {
     %c1 = vm.const.f32 1.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.ceil.f32 %c1dno : f32
     %c2 = vm.const.f32 2.0
     vm.check.eq %v, %c2, "ceil(1.5)=2.0" : f32
@@ -107,7 +107,7 @@
   vm.export @test_floor_f32
   vm.func @test_floor_f32() {
     %c1 = vm.const.f32 1.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.floor.f32 %c1dno : f32
     %c2 = vm.const.f32 1.0
     vm.check.eq %v, %c2, "floor(1.5)=1.0" : f32
@@ -117,7 +117,7 @@
   vm.export @test_atan_f32
   vm.func @test_atan_f32() {
     %c1 = vm.const.f32 1.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.atan.f32 %c1dno : f32
     %c2 = vm.const.f32 0.7853981633974483
     vm.check.eq %v, %c2, "atan(1.0)=0.7853981633974483" : f32
@@ -127,9 +127,9 @@
   vm.export @test_atan2_f32
   vm.func @test_atan2_f32() {
     %c1 = vm.const.f32 1.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %c2 = vm.const.f32 0.0
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %v = vm.atan2.f32 %c1dno, %c2dno : f32
     %c3 = vm.const.f32 1.5707963267948966
     vm.check.eq %v, %c3, "atan2(1.0,0.0)=1.5707963267948966" : f32
@@ -139,7 +139,7 @@
   vm.export @test_cos_f32
   vm.func @test_cos_f32() {
     %c1 = vm.const.f32 0.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cos.f32 %c1dno : f32
     %c2 = vm.const.f32 0.8775825618903728
     vm.check.eq %v, %c2, "cos(0.5)=0.8775825618903728" : f32
@@ -149,7 +149,7 @@
   vm.export @test_sin_f32
   vm.func @test_sin_f32() {
     %c1 = vm.const.f32 0.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.sin.f32 %c1dno : f32
     %c2 = vm.const.f32 0.479425538604203
     vm.check.eq %v, %c2, "sin(0.5)=0.479425538604203" : f32
@@ -159,7 +159,7 @@
   vm.export @test_exp_f32
   vm.func @test_exp_f32() {
     %c1 = vm.const.f32 1.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.exp.f32 %c1dno : f32
     %c2 = vm.const.f32 2.718281828459045
     vm.check.eq %v, %c2, "exp(1.0)=2.718281828459045" : f32
@@ -169,7 +169,7 @@
   vm.export @test_exp2_f32
   vm.func @test_exp2_f32() {
     %c1 = vm.const.f32 2.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.exp2.f32 %c1dno : f32
     %c2 = vm.const.f32 4.0
     vm.check.eq %v, %c2, "exp(2.0)=4.0" : f32
@@ -179,7 +179,7 @@
   vm.export @test_expm1_f32
   vm.func @test_expm1_f32() {
     %c1 = vm.const.f32 2.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.expm1.f32 %c1dno : f32
     %c2 = vm.const.f32 6.38905609893065
     vm.check.eq %v, %c2, "expm1(2.0)=6.38905609893065" : f32
@@ -189,7 +189,7 @@
   vm.export @test_log_f32
   vm.func @test_log_f32() {
     %c1 = vm.const.f32 10.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.log.f32 %c1dno : f32
     %c2 = vm.const.f32 2.302585092994046
     vm.check.eq %v, %c2, "log(10.0)=2.302585092994046" : f32
@@ -199,7 +199,7 @@
   vm.export @test_log10_f32
   vm.func @test_log10_f32() {
     %c1 = vm.const.f32 10.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.log10.f32 %c1dno : f32
     %c2 = vm.const.f32 1.0
     vm.check.eq %v, %c2, "log10(10.0)=1.0" : f32
@@ -209,7 +209,7 @@
   vm.export @test_log1p_f32
   vm.func @test_log1p_f32() {
     %c1 = vm.const.f32 10.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.log1p.f32 %c1dno : f32
     %c2 = vm.const.f32 2.3978952727983707
     vm.check.eq %v, %c2, "log1p(10.0)=2.3978952727983707" : f32
@@ -219,7 +219,7 @@
   vm.export @test_log2_f32
   vm.func @test_log2_f32() {
     %c1 = vm.const.f32 10.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.log2.f32 %c1dno : f32
     %c2 = vm.const.f32 3.321928094887362
     vm.check.eq %v, %c2, "log2(10.0)=3.321928094887362" : f32
@@ -229,9 +229,9 @@
   vm.export @test_pow_f32
   vm.func @test_pow_f32() {
     %c1 = vm.const.f32 3.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %c2 = vm.const.f32 2.0
-    %c2dno = util.do_not_optimize(%c2) : f32
+    %c2dno = util.optimization_barrier %c2 : f32
     %v = vm.pow.f32 %c1dno, %c2dno : f32
     %c3 = vm.const.f32 9.0
     vm.check.eq %v, %c3, "pow(3.0,2.0)=9.0" : f32
@@ -241,7 +241,7 @@
   vm.export @test_rsqrt_f32
   vm.func @test_rsqrt_f32() {
     %c1 = vm.const.f32 4.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.rsqrt.f32 %c1dno : f32
     %c2 = vm.const.f32 0.5
     vm.check.eq %v, %c2, "rsqrt(4.0)=0.5" : f32
@@ -251,7 +251,7 @@
   vm.export @test_sqrt_f32
   vm.func @test_sqrt_f32() {
     %c1 = vm.const.f32 4.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.sqrt.f32 %c1dno : f32
     %c2 = vm.const.f32 2.0
     vm.check.eq %v, %c2, "sqrt(4.0)=2.0" : f32
@@ -261,7 +261,7 @@
   vm.export @test_tanh_f32
   vm.func @test_tanh_f32() {
     %c1 = vm.const.f32 0.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.tanh.f32 %c1dno : f32
     %c2 = vm.const.f32 0.46211715726000974
     vm.check.eq %v, %c2, "tanh(0.5)=0.46211715726000974" : f32
@@ -272,7 +272,7 @@
   // vm.export @test_erf_f32
   // vm.func @test_erf_f32() {
   //   %c1 = vm.const.f32 0.5
-  //   %c1dno = util.do_not_optimize(%c1) : f32
+  //   %c1dno = util.optimization_barrier %c1 : f32
   //   %v = vm.erf.f32 %c1dno : f32
   //   %c2 = vm.const.f32 0.520499945
   //   vm.check.eq %v, %c2, "erf(0.5)=0.520499945" : f32
diff --git a/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir b/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir
index f124575..fd75c60 100644
--- a/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir
+++ b/runtime/src/iree/vm/test/arithmetic_ops_i64.mlir
@@ -7,7 +7,7 @@
   vm.export @test_add_i64
   vm.func @test_add_i64() {
     %c1 = vm.const.i64 1
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %v = vm.add.i64 %c1dno, %c1dno : i64
     %c2 = vm.const.i64 2
     vm.check.eq %v, %c2, "1+1=2" : i64
@@ -17,9 +17,9 @@
   vm.export @test_sub_i64
   vm.func @test_sub_i64() {
     %c1 = vm.const.i64 3
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.sub.i64 %c1dno, %c2dno : i64
     %c3 = vm.const.i64 1
     vm.check.eq %v, %c3, "3-2=1" : i64
@@ -29,7 +29,7 @@
   vm.export @test_mul_i64
   vm.func @test_mul_i64() {
     %c1 = vm.const.i64 2
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %v = vm.mul.i64 %c1dno, %c1dno : i64
     %c2 = vm.const.i64 4
     vm.check.eq %v, %c2, "2*2=4" : i64
@@ -39,9 +39,9 @@
   vm.export @test_div_i64s
   vm.func @test_div_i64s() {
     %c1 = vm.const.i64 4
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 -2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.div.i64.s %c1dno, %c2dno : i64
     %c3 = vm.const.i64 -2
     vm.check.eq %v, %c3, "4/-2=-2" : i64
@@ -51,9 +51,9 @@
   vm.export @test_div_i64u
   vm.func @test_div_i64u() {
     %c1 = vm.const.i64 4
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.div.i64.u %c1dno, %c2dno : i64
     %c3 = vm.const.i64 2
     vm.check.eq %v, %c3, "4/2=2" : i64
@@ -63,9 +63,9 @@
   vm.export @test_rem_i64s
   vm.func @test_rem_i64s() {
     %c1 = vm.const.i64 -3
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 -2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.rem.i64.s %c1dno, %c2dno : i64
     %c3 = vm.const.i64 -1
     vm.check.eq %v, %c3, "-3%-2=-1" : i64
@@ -75,9 +75,9 @@
   vm.export @test_rem_i64u
   vm.func @test_rem_i64u() {
     %c1 = vm.const.i64 3
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.rem.i64.u %c1dno, %c2dno : i64
     %c3 = vm.const.i64 1
     vm.check.eq %v, %c3, "3%2=1" : i64
@@ -87,11 +87,11 @@
   vm.export @test_fma_i64
   vm.func @test_fma_i64() {
     %c2 = vm.const.i64 2
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %c3 = vm.const.i64 3
-    %c3dno = util.do_not_optimize(%c3) : i64
+    %c3dno = util.optimization_barrier %c3 : i64
     %c5 = vm.const.i64 5
-    %c5dno = util.do_not_optimize(%c5) : i64
+    %c5dno = util.optimization_barrier %c5 : i64
     %v = vm.fma.i64 %c2dno, %c3dno, %c5dno : i64
     %c11 = vm.const.i64 11
     vm.check.eq %v, %c11, "2*3+5=11" : i64
@@ -101,7 +101,7 @@
   vm.export @test_abs_i64
   vm.func @test_abs_i64() {
     %c1 = vm.const.i64 -1
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %v = vm.abs.i64 %c1dno : i64
     %c2 = vm.const.i64 1
     vm.check.eq %v, %c2, "abs(-1)=1" : i64
@@ -111,7 +111,7 @@
   vm.export @test_not_i64
   vm.func @test_not_i64() {
     %c1 = vm.const.i64 0
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %v = vm.not.i64 %c1dno : i64
     %c2 = vm.const.i64 -1
     vm.check.eq %v, %c2, "~0=-1" : i64
@@ -121,9 +121,9 @@
   vm.export @test_and_i64
   vm.func @test_and_i64() {
     %c1 = vm.const.i64 5
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 3
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.and.i64 %c1dno, %c2dno : i64
     %c3 = vm.const.i64 1
     vm.check.eq %v, %c3, "5&3=1" : i64
@@ -133,9 +133,9 @@
   vm.export @test_or_i64
   vm.func @test_or_i64() {
     %c1 = vm.const.i64 5
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 3
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.or.i64 %c1dno, %c2dno : i64
     %c3 = vm.const.i64 7
     vm.check.eq %v, %c3, "5|3=7" : i64
@@ -145,9 +145,9 @@
   vm.export @test_xor_i64
   vm.func @test_xor_i64() {
     %c1 = vm.const.i64 5
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %c2 = vm.const.i64 3
-    %c2dno = util.do_not_optimize(%c2) : i64
+    %c2dno = util.optimization_barrier %c2 : i64
     %v = vm.xor.i64 %c1dno, %c2dno : i64
     %c3 = vm.const.i64 6
     vm.check.eq %v, %c3, "5^3=6" : i64
@@ -157,7 +157,7 @@
   vm.export @test_ctlz_i64_const_zero
   vm.func @test_ctlz_i64_const_zero() {
     %c = vm.const.i64 0
-    %cdno = util.do_not_optimize(%c) : i64
+    %cdno = util.optimization_barrier %c : i64
     %actual = vm.ctlz.i64 %cdno : i64
     %expected = vm.const.i64 64
     vm.check.eq %actual, %expected, "ctlz(0)=64" : i64
@@ -167,7 +167,7 @@
   vm.export @test_ctlz_i64_const_1
   vm.func @test_ctlz_i64_const_1() {
     %c = vm.const.i64 1
-    %cdno = util.do_not_optimize(%c) : i64
+    %cdno = util.optimization_barrier %c : i64
     %actual = vm.ctlz.i64 %cdno : i64
     %expected = vm.const.i64 63
     vm.check.eq %actual, %expected, "ctlz(1)=63" : i64
@@ -177,7 +177,7 @@
   vm.export @test_ctlz_i64_const_ffffffffffffffff
   vm.func @test_ctlz_i64_const_ffffffffffffffff() {
     %c = vm.const.i64 0xFFFFFFFFFFFFFFFF
-    %cdno = util.do_not_optimize(%c) : i64
+    %cdno = util.optimization_barrier %c : i64
     %actual = vm.ctlz.i64 %cdno : i64
     %expected = vm.const.i64 0
     vm.check.eq %actual, %expected, "ctlz(0xFFFFFFFFFFFFFFFF)=0" : i64
diff --git a/runtime/src/iree/vm/test/assignment_ops.mlir b/runtime/src/iree/vm/test/assignment_ops.mlir
index a5b77c7..e00e712 100644
--- a/runtime/src/iree/vm/test/assignment_ops.mlir
+++ b/runtime/src/iree/vm/test/assignment_ops.mlir
@@ -7,9 +7,9 @@
   vm.export @test_select_i32
   vm.func @test_select_i32() {
     %c0 = vm.const.i32 0
-    %c0dno = util.do_not_optimize(%c0) : i32
+    %c0dno = util.optimization_barrier %c0 : i32
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v1 = vm.select.i32 %c0dno, %c0dno, %c1dno : i32
     vm.check.eq %v1, %c1, "0 ? 0 : 1 = 1" : i32
     %v2 = vm.select.i32 %c1dno, %c0dno, %c1dno : i32
@@ -24,7 +24,7 @@
     %c1 = vm.const.i32 1
     %list1 = vm.list.alloc %c1 : (i32) -> !vm.list<i8>
     %cond = vm.const.i32 0
-    %cond_dno = util.do_not_optimize(%cond) : i32
+    %cond_dno = util.optimization_barrier %cond : i32
     %list = vm.select.ref %cond_dno, %list0, %list1 : !vm.list<i8>
     vm.check.eq %list, %list1, "0 ? list0 : list1 = list1" : !vm.list<i8>
     vm.return
diff --git a/runtime/src/iree/vm/test/assignment_ops_f32.mlir b/runtime/src/iree/vm/test/assignment_ops_f32.mlir
index 1a88bd0..825d08a 100644
--- a/runtime/src/iree/vm/test/assignment_ops_f32.mlir
+++ b/runtime/src/iree/vm/test/assignment_ops_f32.mlir
@@ -7,9 +7,9 @@
   vm.export @test_select_f32
   vm.func @test_select_f32() {
     %c0 = vm.const.i32 0
-    %c0dno = util.do_not_optimize(%c0) : i32
+    %c0dno = util.optimization_barrier %c0 : i32
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.f32 0.0
     %c3 = vm.const.f32 1.0
     %v1 = vm.select.f32 %c0dno, %c2, %c3 : f32
diff --git a/runtime/src/iree/vm/test/assignment_ops_i64.mlir b/runtime/src/iree/vm/test/assignment_ops_i64.mlir
index 72429f3..04f68ad 100644
--- a/runtime/src/iree/vm/test/assignment_ops_i64.mlir
+++ b/runtime/src/iree/vm/test/assignment_ops_i64.mlir
@@ -7,9 +7,9 @@
   vm.export @test_select_i64
   vm.func @test_select_i64() {
     %c0 = vm.const.i32 0
-    %c0dno = util.do_not_optimize(%c0) : i32
+    %c0dno = util.optimization_barrier %c0 : i32
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i64 0
     %c3 = vm.const.i64 1
     %v1 = vm.select.i64 %c0dno, %c2, %c3 : i64
diff --git a/runtime/src/iree/vm/test/async_ops.mlir b/runtime/src/iree/vm/test/async_ops.mlir
index 2b9fb86..fd3aa65 100644
--- a/runtime/src/iree/vm/test/async_ops.mlir
+++ b/runtime/src/iree/vm/test/async_ops.mlir
@@ -18,15 +18,15 @@
   vm.func @yield_sequence(%arg0: i32) -> i32 {
     %c1 = vm.const.i32 1
     %y0 = vm.add.i32 %arg0, %c1 : i32
-    %y0_dno = util.do_not_optimize(%y0) : i32
+    %y0_dno = util.optimization_barrier %y0 : i32
     vm.yield ^bb1
   ^bb1:
     %y1 = vm.add.i32 %y0_dno, %c1 : i32
-    %y1_dno = util.do_not_optimize(%y1) : i32
+    %y1_dno = util.optimization_barrier %y1 : i32
     vm.yield ^bb2
   ^bb2:
     %y2 = vm.add.i32 %y1_dno, %c1 : i32
-    %y2_dno = util.do_not_optimize(%y2) : i32
+    %y2_dno = util.optimization_barrier %y2 : i32
     vm.yield ^bb3
   ^bb3:
     vm.return %y2_dno : i32
@@ -41,10 +41,10 @@
     %cond = vm.cmp.nz.i32 %arg0 : i32
     vm.cond_br %cond, ^true, ^false
   ^true:
-    %arg1_dno = util.do_not_optimize(%arg1) : i32
+    %arg1_dno = util.optimization_barrier %arg1 : i32
     vm.yield ^bb3(%arg1_dno : i32)
   ^false:
-    %arg2_dno = util.do_not_optimize(%arg2) : i32
+    %arg2_dno = util.optimization_barrier %arg2 : i32
     vm.yield ^bb3(%arg2_dno: i32)
   ^bb3(%result : i32):
     vm.return %result : i32
diff --git a/runtime/src/iree/vm/test/buffer_ops.mlir b/runtime/src/iree/vm/test/buffer_ops.mlir
index ba6aab1..a76ed5b 100644
--- a/runtime/src/iree/vm/test/buffer_ops.mlir
+++ b/runtime/src/iree/vm/test/buffer_ops.mlir
@@ -16,8 +16,8 @@
   vm.func private @test_compare() {
     %rodata_a = vm.const.ref.rodata @rodata_cmp_3xi32_a : !vm.buffer
     %rodata_b = vm.const.ref.rodata @rodata_cmp_3xi32_b : !vm.buffer
-    %rodata_a_dno = util.do_not_optimize(%rodata_a) : !vm.buffer
-    %rodata_b_dno = util.do_not_optimize(%rodata_b) : !vm.buffer
+    %rodata_a_dno = util.optimization_barrier %rodata_a : !vm.buffer
+    %rodata_b_dno = util.optimization_barrier %rodata_b : !vm.buffer
 
     %c0 = vm.const.i64 0
     %length = vm.buffer.length %rodata_a_dno : !vm.buffer -> i64
@@ -37,8 +37,8 @@
   vm.func private @test_compare_empty() {
     %rodata_a = vm.const.ref.rodata @rodata_cmp_3xi32_a : !vm.buffer
     %rodata_b = vm.const.ref.rodata @rodata_cmp_3xi32_b : !vm.buffer
-    %rodata_a_dno = util.do_not_optimize(%rodata_a) : !vm.buffer
-    %rodata_b_dno = util.do_not_optimize(%rodata_b) : !vm.buffer
+    %rodata_a_dno = util.optimization_barrier %rodata_a : !vm.buffer
+    %rodata_b_dno = util.optimization_barrier %rodata_b : !vm.buffer
 
     %c0 = vm.const.i64 0
     %c2 = vm.const.i64 2
@@ -58,7 +58,7 @@
   vm.func private @test_alloc() {
     %c128 = vm.const.i64 128
     %buf = vm.buffer.alloc %c128 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     %buf_length = vm.buffer.length %buf_dno : !vm.buffer -> i64
@@ -72,7 +72,7 @@
   vm.func private @test_alloc_empty() {
     %c0 = vm.const.i64 0
     %buf = vm.buffer.alloc %c0 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     %buf_length = vm.buffer.length %buf_dno : !vm.buffer -> i64
@@ -95,7 +95,7 @@
     %c4 = vm.const.i64 4
     %c8 = vm.const.i64 8
     %buf = vm.buffer.clone %rodata, %c4, %c8 : !vm.buffer -> !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Compare the cloned range to the original.
@@ -112,14 +112,14 @@
     // Allocate source zero-length buffer.
     %c0 = vm.const.i64 0
     %buf0 = vm.buffer.alloc %c0 : !vm.buffer
-    %buf0_dno = util.do_not_optimize(%buf0) : !vm.buffer
+    %buf0_dno = util.optimization_barrier %buf0 : !vm.buffer
     vm.check.nz %buf0_dno, "!null" : !vm.buffer
     %buf0_length = vm.buffer.length %buf0_dno : !vm.buffer -> i64
     vm.check.eq %c0, %buf0_length, "buffer length == 0" : i64
 
     // Clone it all (or, clone nothing?).
     %buf1 = vm.buffer.clone %buf0_dno, %c0, %c0 : !vm.buffer -> !vm.buffer
-    %buf1_dno = util.do_not_optimize(%buf1) : !vm.buffer
+    %buf1_dno = util.optimization_barrier %buf1 : !vm.buffer
     vm.check.nz %buf1_dno, "!null" : !vm.buffer
     %buf1_length = vm.buffer.length %buf1_dno : !vm.buffer -> i64
     vm.check.eq %c0, %buf1_length, "buffer length == 0" : i64
@@ -132,7 +132,7 @@
   vm.func private @fail_clone_out_of_range() {
     // Fetch source .rodata blob.
     %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
-    %rodata_dno = util.do_not_optimize(%rodata) : !vm.buffer
+    %rodata_dno = util.optimization_barrier %rodata : !vm.buffer
     vm.check.nz %rodata_dno, "!null" : !vm.buffer
 
     // Try to clone off the end of the buffer.
@@ -156,7 +156,7 @@
 
     // Allocate target buffer.
     %buf = vm.buffer.alloc %rodata_length : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Copy the entire contents.
@@ -178,7 +178,7 @@
     // Allocate target buffer.
     %c4 = vm.const.i64 4
     %buf = vm.buffer.alloc %c4 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Copy the middle 4-byte element.
@@ -200,7 +200,7 @@
     %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
     %c128 = vm.const.i64 128
     %buf = vm.buffer.alloc %c128 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Try to clone off the end of the source buffer.
@@ -216,7 +216,7 @@
     %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
     %c128 = vm.const.i64 128
     %buf = vm.buffer.alloc %c128 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Try to clone off the end of the source buffer.
@@ -234,7 +234,7 @@
     %rodata_length = vm.buffer.length %rodata : !vm.buffer -> i64
     %c8 = vm.const.i64 8
     %buf = vm.buffer.alloc %c8 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Try to clone off the end of the target buffer.
@@ -250,7 +250,7 @@
     %rodata = vm.const.ref.rodata @rodata_3xi32 : !vm.buffer
     %c8 = vm.const.i64 8
     %buf = vm.buffer.alloc %c8 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Try to clone off the end of the target buffer.
@@ -272,7 +272,7 @@
     // Allocate zeroed buffer.
     %c8 = vm.const.i64 8
     %buf = vm.buffer.alloc %c8 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
     vm.check.nz %buf_dno, "!null" : !vm.buffer
 
     // Fill the middle two elements.
@@ -298,7 +298,7 @@
     // Allocate zeroed buffer.
     %c8 = vm.const.i64 8
     %buf = vm.buffer.alloc %c8 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
 
     // Try filling from offset 1, which is not i16-aligned.
     %c1 = vm.const.i64 1
@@ -324,7 +324,7 @@
     // Allocate zeroed buffer.
     %c8 = vm.const.i64 8
     %buf = vm.buffer.alloc %c8 : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
 
     // Try filling for length 1, which is not i16-aligned.
     %c0 = vm.const.i64 0
@@ -503,11 +503,11 @@
   vm.export @test_store_i8 attributes {emitc.exclude}
   vm.func private @test_store_i8() {
     %ref = vm.const.ref.rodata @test_store_i8_ref : !vm.buffer
-    %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+    %ref_dno = util.optimization_barrier %ref : !vm.buffer
     %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i64
 
     %buf = vm.buffer.alloc %ref_length : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
 
     %c0 = vm.const.i64 0
     %e0 = vm.const.i32 0
@@ -536,11 +536,11 @@
   vm.export @test_store_i16 attributes {emitc.exclude}
   vm.func private @test_store_i16() {
     %ref = vm.const.ref.rodata @test_store_i16_ref : !vm.buffer
-    %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+    %ref_dno = util.optimization_barrier %ref : !vm.buffer
     %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i64
 
     %buf = vm.buffer.alloc %ref_length : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
 
     %c0 = vm.const.i64 0
     %e0 = vm.const.i32 0
@@ -569,11 +569,11 @@
   vm.export @test_store_i32 attributes {emitc.exclude}
   vm.func private @test_store_i32() {
     %ref = vm.const.ref.rodata @test_store_i32_ref : !vm.buffer
-    %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+    %ref_dno = util.optimization_barrier %ref : !vm.buffer
     %ref_length = vm.buffer.length %ref_dno : !vm.buffer -> i64
 
     %buf = vm.buffer.alloc %ref_length : !vm.buffer
-    %buf_dno = util.do_not_optimize(%buf) : !vm.buffer
+    %buf_dno = util.optimization_barrier %buf : !vm.buffer
 
     %c0 = vm.const.i64 0
     %e0 = vm.const.i32 0
diff --git a/runtime/src/iree/vm/test/call_ops.mlir b/runtime/src/iree/vm/test/call_ops.mlir
index 6f5072d..420fd46 100644
--- a/runtime/src/iree/vm/test/call_ops.mlir
+++ b/runtime/src/iree/vm/test/call_ops.mlir
@@ -36,16 +36,16 @@
   // ordinal allocation and vm to EmitC conversion to prevent constant folding
   // of the tests during the lattter. This means we would need to add a pattern
   // that inserts calls to `iree_vm_ref_retain` for operand/result pairs of the
-  // do_not_optimize op.
+  // barrier op.
   // TODO(simon-camp): Enable the test for emitc.
   vm.export @test_call_r_v_preserve_ref attributes {emitc.exclude}
   vm.func private @test_call_r_v_preserve_ref() {
     %ref = vm.const.ref.zero : !vm.buffer
     %unused = vm.const.ref.rodata @buffer : !vm.buffer
-    %unusued_dno_1 = util.do_not_optimize(%unused) : !vm.buffer
+    %unusued_dno_1 = util.optimization_barrier %unused : !vm.buffer
     vm.check.nz %unused : !vm.buffer
     vm.call @_r_v_preserve_reg(%ref, %unused) : (!vm.buffer, !vm.buffer) -> ()
-    %unusued_dno_2 = util.do_not_optimize(%unused) : !vm.buffer
+    %unusued_dno_2 = util.optimization_barrier %unused : !vm.buffer
     vm.check.nz %unusued_dno_2 : !vm.buffer
     vm.return
   }
@@ -61,7 +61,7 @@
   vm.export @test_call_v_r
   vm.func @test_call_v_r() {
     %ref = vm.const.ref.zero : !vm.ref<?>
-    %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+    %ref_dno = util.optimization_barrier %ref : !vm.ref<?>
     %res = vm.call @_v_r() : () -> (!vm.ref<?>)
     vm.check.eq %ref_dno, %res, "_v_r()=NULL" : !vm.ref<?>
     vm.return
@@ -91,21 +91,21 @@
 
   vm.func @_r_v(%arg : !vm.ref<?>) attributes {noinline} {
     %ref = vm.const.ref.zero : !vm.ref<?>
-    %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+    %ref_dno = util.optimization_barrier %ref : !vm.ref<?>
     vm.check.eq %arg, %ref_dno, "Expected %arg to be NULL" : !vm.ref<?>
     vm.return
   }
 
   vm.func @_r_v_reuse_reg(%arg : !vm.ref<?>, %unused : !vm.ref<?>) attributes {noinline} {
     %ref = vm.const.ref.zero : !vm.ref<?>
-    %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+    %ref_dno = util.optimization_barrier %ref : !vm.ref<?>
     vm.check.eq %arg, %ref_dno, "Expected %arg to be NULL" : !vm.ref<?>
     vm.return
   }
 
   vm.func @_r_v_preserve_reg(%arg1 : !vm.ref<?>, %arg2 : !vm.ref<?>) attributes {noinline} {
     %ref = vm.const.ref.zero : !vm.ref<?>
-    %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+    %ref_dno = util.optimization_barrier %ref : !vm.ref<?>
     vm.check.eq %arg1, %ref_dno, "Expected %arg1 to be NULL" : !vm.ref<?>
     vm.check.nz %arg2, "Expected %arg2 to be not NULL" : !vm.ref<?>
     vm.return
diff --git a/runtime/src/iree/vm/test/comparison_ops.mlir b/runtime/src/iree/vm/test/comparison_ops.mlir
index 56067b8..f009545 100644
--- a/runtime/src/iree/vm/test/comparison_ops.mlir
+++ b/runtime/src/iree/vm/test/comparison_ops.mlir
@@ -7,9 +7,9 @@
   vm.export @test_cmp_lt_s_0
   vm.func @test_cmp_lt_s_0() {
     %lhs = vm.const.i32 2
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 -2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "2 < -2" : i32
@@ -19,9 +19,9 @@
   vm.export @test_cmp_lt_s_1
   vm.func @test_cmp_lt_s_1() {
     %lhs = vm.const.i32 -2
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "-2 < 2" : i32
@@ -32,9 +32,9 @@
   vm.export @test_cmp_lt_s_2
   vm.func @test_cmp_lt_s_2() {
     %lhs = vm.const.i32 4294967295
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.s %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2" : i32
@@ -48,9 +48,9 @@
   vm.export @test_cmp_lt_u_0
   vm.func @test_cmp_lt_u_0() {
     %lhs = vm.const.i32 2
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 -2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "2 < -2 (as unsigned)" : i32
@@ -60,9 +60,9 @@
   vm.export @test_cmp_lt_u_1
   vm.func @test_cmp_lt_u_1() {
     %lhs = vm.const.i32 -2
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "-2 < 2 (as unsigned)" : i32
@@ -72,9 +72,9 @@
   vm.export @test_cmp_lt_u_2
   vm.func @test_cmp_lt_u_2() {
     %lhs = vm.const.i32 4294967295
-    %lhs_dno = util.do_not_optimize(%lhs) : i32
+    %lhs_dno = util.optimization_barrier %lhs : i32
     %rhs = vm.const.i32 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i32
+    %rhs_dno = util.optimization_barrier %rhs : i32
     %actual = vm.cmp.lt.i32.u %lhs_dno, %rhs_dno : i32
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < 2 (as unsigned)" : i32
@@ -94,9 +94,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i32 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i32
+    %cn2_dno = util.optimization_barrier %cn2 : i32
     %c2 = vm.const.i32 2
-    %c2_dno = util.do_not_optimize(%c2) : i32
+    %c2_dno = util.optimization_barrier %c2 : i32
 
     %cmp_0 = vm.cmp.lte.i32.s %cn2_dno, %c2_dno : i32
     vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
@@ -121,9 +121,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i32 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i32
+    %cn2_dno = util.optimization_barrier %cn2 : i32
     %c2 = vm.const.i32 2
-    %c2_dno = util.do_not_optimize(%c2) : i32
+    %c2_dno = util.optimization_barrier %c2 : i32
 
     %cmp_0 = vm.cmp.gt.i32.s %cn2_dno, %c2_dno : i32
     vm.check.eq %cmp_0, %false, "-2 > 2" : i32
@@ -148,9 +148,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i32 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i32
+    %cn2_dno = util.optimization_barrier %cn2 : i32
     %c2 = vm.const.i32 2
-    %c2_dno = util.do_not_optimize(%c2) : i32
+    %c2_dno = util.optimization_barrier %c2 : i32
 
     %cmp_0 = vm.cmp.gte.i32.s %cn2_dno, %c2_dno : i32
     vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
diff --git a/runtime/src/iree/vm/test/comparison_ops_f32.mlir b/runtime/src/iree/vm/test/comparison_ops_f32.mlir
index 69487cc..363a02e 100644
--- a/runtime/src/iree/vm/test/comparison_ops_f32.mlir
+++ b/runtime/src/iree/vm/test/comparison_ops_f32.mlir
@@ -7,9 +7,9 @@
   vm.export @test_cmp_lt_0_f32
   vm.func @test_cmp_lt_0_f32() {
     %lhs = vm.const.f32 4.0
-    %lhs_dno = util.do_not_optimize(%lhs) : f32
+    %lhs_dno = util.optimization_barrier %lhs : f32
     %rhs = vm.const.f32 -4.0
-    %rhs_dno = util.do_not_optimize(%rhs) : f32
+    %rhs_dno = util.optimization_barrier %rhs : f32
     %actual = vm.cmp.lt.f32.o %lhs_dno, %rhs_dno : f32
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "4.0 < -4.0" : i32
@@ -19,9 +19,9 @@
   vm.export @test_cmp_lt_1_f32
   vm.func @test_cmp_lt_1_f32() {
     %lhs = vm.const.f32 -4.0
-    %lhs_dno = util.do_not_optimize(%lhs) : f32
+    %lhs_dno = util.optimization_barrier %lhs : f32
     %rhs = vm.const.f32 4.0
-    %rhs_dno = util.do_not_optimize(%rhs) : f32
+    %rhs_dno = util.optimization_barrier %rhs : f32
     %actual = vm.cmp.lt.f32.o %lhs_dno, %rhs_dno : f32
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "-4.0 < 4.0" : i32
@@ -41,9 +41,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.f32 -2.0
-    %cn2_dno = util.do_not_optimize(%cn2) : f32
+    %cn2_dno = util.optimization_barrier %cn2 : f32
     %c2 = vm.const.f32 2.0
-    %c2_dno = util.do_not_optimize(%c2) : f32
+    %c2_dno = util.optimization_barrier %c2 : f32
 
     %cmp_0 = vm.cmp.eq.f32.near %cn2_dno, %c2_dno : f32
     vm.check.eq %cmp_0, %false, "-2 !~ 2" : i32
@@ -56,9 +56,9 @@
 
     // off by 84 ULPs, arbitrary threshold sets these as "near enough"
     %c1a = vm.const.f32 1.00002
-    %c1a_dno = util.do_not_optimize(%c1a) : f32
+    %c1a_dno = util.optimization_barrier %c1a : f32
     %c1b = vm.const.f32 1.00003
-    %c1b_dno = util.do_not_optimize(%c1b) : f32
+    %c1b_dno = util.optimization_barrier %c1b : f32
 
     %cmp_4 = vm.cmp.eq.f32.near %c1a_dno, %c1b_dno : f32
     vm.check.eq %cmp_4, %true, "1.00002 ~ 1.00003" : i32
@@ -74,9 +74,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.f32 -2.0
-    %cn2_dno = util.do_not_optimize(%cn2) : f32
+    %cn2_dno = util.optimization_barrier %cn2 : f32
     %c2 = vm.const.f32 2.0
-    %c2_dno = util.do_not_optimize(%c2) : f32
+    %c2_dno = util.optimization_barrier %c2 : f32
 
     %cmp_0 = vm.cmp.lte.f32.o %cn2_dno, %c2_dno : f32
     vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
@@ -94,9 +94,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.f32 -2.0
-    %cn2_dno = util.do_not_optimize(%cn2) : f32
+    %cn2_dno = util.optimization_barrier %cn2 : f32
     %c2 = vm.const.f32 2.0
-    %c2_dno = util.do_not_optimize(%c2) : f32
+    %c2_dno = util.optimization_barrier %c2 : f32
 
     %cmp_0 = vm.cmp.gt.f32.o %cn2_dno, %c2_dno : f32
     vm.check.eq %cmp_0, %false, "-2 > 2" : i32
@@ -114,9 +114,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.f32 -2.0
-    %cn2_dno = util.do_not_optimize(%cn2) : f32
+    %cn2_dno = util.optimization_barrier %cn2 : f32
     %c2 = vm.const.f32 2.0
-    %c2_dno = util.do_not_optimize(%c2) : f32
+    %c2_dno = util.optimization_barrier %c2 : f32
 
     %cmp_0 = vm.cmp.gte.f32.o %cn2_dno, %c2_dno : f32
     vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
diff --git a/runtime/src/iree/vm/test/comparison_ops_i64.mlir b/runtime/src/iree/vm/test/comparison_ops_i64.mlir
index 2e1bd76..3c10ef8 100644
--- a/runtime/src/iree/vm/test/comparison_ops_i64.mlir
+++ b/runtime/src/iree/vm/test/comparison_ops_i64.mlir
@@ -7,9 +7,9 @@
   vm.export @test_cmp_lt_s_0_i64
   vm.func @test_cmp_lt_s_0_i64() {
     %lhs = vm.const.i64 4294967295
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 -4294967295
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "4294967295 (UINT_MAX) < -4294967295 (UINT_MAX)" : i32
@@ -19,9 +19,9 @@
   vm.export @test_cmp_lt_s_1_i64
   vm.func @test_cmp_lt_s_1_i64() {
     %lhs = vm.const.i64 -4294967295
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 4294967295
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "-4294967295 (UINT_MAX) < 4294967295 (UINT_MAX)" : i32
@@ -32,9 +32,9 @@
   vm.export @test_cmp_lt_s_2_i64
   vm.func @test_cmp_lt_s_2_i64() {
     %lhs = vm.const.i64 18446744073709551615
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.s %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "18446744073709551615 (ULONG_MAX) < 2" : i32
@@ -48,9 +48,9 @@
   vm.export @test_cmp_lt_u_0_i64
   vm.func @test_cmp_lt_u_0_i64() {
     %lhs = vm.const.i64 2
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 -2
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 1
     vm.check.eq %actual, %expected, "2 < -2 (as unsigned)" : i32
@@ -60,9 +60,9 @@
   vm.export @test_cmp_lt_u_1_i64
   vm.func @test_cmp_lt_u_1_i64() {
     %lhs = vm.const.i64 -2
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "-2 < 2 (as unsigned)" : i32
@@ -72,9 +72,9 @@
   vm.export @test_cmp_lt_u_2_i64
   vm.func @test_cmp_lt_u_2_i64() {
     %lhs = vm.const.i64 18446744073709551615
-    %lhs_dno = util.do_not_optimize(%lhs) : i64
+    %lhs_dno = util.optimization_barrier %lhs : i64
     %rhs = vm.const.i64 2
-    %rhs_dno = util.do_not_optimize(%rhs) : i64
+    %rhs_dno = util.optimization_barrier %rhs : i64
     %actual = vm.cmp.lt.i64.u %lhs_dno, %rhs_dno : i64
     %expected = vm.const.i32 0
     vm.check.eq %actual, %expected, "18446744073709551615 (ULONG_MAX) < 2 (as unsigned)" : i32
@@ -94,9 +94,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i64 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i64
+    %cn2_dno = util.optimization_barrier %cn2 : i64
     %c2 = vm.const.i64 2
-    %c2_dno = util.do_not_optimize(%c2) : i64
+    %c2_dno = util.optimization_barrier %c2 : i64
 
     %cmp_0 = vm.cmp.lte.i64.s %cn2_dno, %c2_dno : i64
     vm.check.eq %cmp_0, %true, "-2 <= 2" : i32
@@ -121,9 +121,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i64 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i64
+    %cn2_dno = util.optimization_barrier %cn2 : i64
     %c2 = vm.const.i64 2
-    %c2_dno = util.do_not_optimize(%c2) : i64
+    %c2_dno = util.optimization_barrier %c2 : i64
 
     %cmp_0 = vm.cmp.gt.i64.s %cn2_dno, %c2_dno : i64
     vm.check.eq %cmp_0, %false, "-2 > 2" : i32
@@ -148,9 +148,9 @@
     %false = vm.const.i32 0
 
     %cn2 = vm.const.i64 -2
-    %cn2_dno = util.do_not_optimize(%cn2) : i64
+    %cn2_dno = util.optimization_barrier %cn2 : i64
     %c2 = vm.const.i64 2
-    %c2_dno = util.do_not_optimize(%c2) : i64
+    %c2_dno = util.optimization_barrier %c2 : i64
 
     %cmp_0 = vm.cmp.gte.i64.s %cn2_dno, %c2_dno : i64
     vm.check.eq %cmp_0, %false, "-2 >= 2" : i32
diff --git a/runtime/src/iree/vm/test/control_flow_ops.mlir b/runtime/src/iree/vm/test/control_flow_ops.mlir
index cd1acab..7a54ea1 100644
--- a/runtime/src/iree/vm/test/control_flow_ops.mlir
+++ b/runtime/src/iree/vm/test/control_flow_ops.mlir
@@ -26,7 +26,7 @@
   vm.export @test_check_eq_always
   vm.func @test_check_eq_always() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     vm.check.eq %c1, %c1dno, "error!" : i32
     vm.return
   }
@@ -35,8 +35,8 @@
   vm.func @fail_check_eq_never() {
     %c1 = vm.const.i32 1
     %c2 = vm.const.i32 2
-    %c1dno = util.do_not_optimize(%c1) : i32
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     vm.check.eq %c1dno, %c2dno, "error!" : i32
     vm.return
   }
@@ -72,7 +72,7 @@
   vm.export @test_cond_br
   vm.func @test_cond_br() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     vm.cond_br %c1dno, ^bb1, ^bb2
   ^bb1:
     vm.check.eq %c1dno, %c1dno, "error!" : i32
@@ -85,7 +85,7 @@
   vm.export @test_cond_br_int_arg
   vm.func @test_cond_br_int_arg() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     vm.cond_br %c1dno, ^bb1(%c1dno : i32), ^bb2(%c1dno : i32)
   ^bb1(%arg1 : i32):
     vm.check.eq %arg1, %c1dno, "error!" : i32
@@ -98,7 +98,7 @@
   vm.export @test_cond_br_ref_arg
   vm.func @test_cond_br_ref_arg() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %ref = vm.const.ref.zero : !vm.ref<?>
     vm.cond_br %c1dno, ^bb1(%ref : !vm.ref<?>), ^bb2(%ref : !vm.ref<?>)
   ^bb1(%arg1 : !vm.ref<?>):
@@ -115,9 +115,9 @@
   vm.export @test_cond_br_same_successor attributes {emitc.exclude}
   vm.func private @test_cond_br_same_successor() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 2
-    %c2dno = util.do_not_optimize(%c2) : i32
+    %c2dno = util.optimization_barrier %c2 : i32
     vm.cond_br %c1dno, ^bb1(%c1dno : i32), ^bb1(%c2dno : i32)
   ^bb1(%arg1 : i32):
     vm.check.eq %arg1, %c1dno, "error!" : i32
@@ -133,17 +133,17 @@
     %ref_b = vm.const.ref.rodata @buffer_b : !vm.buffer
     %ref_c = vm.const.ref.rodata @buffer_c : !vm.buffer
 
-    %res:3 = vm.call @_return_arg_cycling(%ref_a, %ref_b, %ref_c) 
+    %res:3 = vm.call @_return_arg_cycling(%ref_a, %ref_b, %ref_c)
         : (!vm.buffer, !vm.buffer, !vm.buffer) -> (!vm.buffer, !vm.buffer, !vm.buffer)
     vm.check.eq %res#0, %ref_b : !vm.buffer
     vm.check.eq %res#1, %ref_c : !vm.buffer
     vm.check.eq %res#2, %ref_a : !vm.buffer
-    
+
     vm.return
   }
 
   vm.func private @_return_arg_cycling(%arg0 : !vm.buffer, %arg1: !vm.buffer,
-                                       %arg2: !vm.buffer) 
+                                       %arg2: !vm.buffer)
       -> (!vm.buffer, !vm.buffer, !vm.buffer) attributes {noinline} {
     vm.return %arg1, %arg2, %arg0 : !vm.buffer, !vm.buffer, !vm.buffer
   }
@@ -155,20 +155,20 @@
     %ref_c = vm.const.ref.rodata @buffer_c : !vm.buffer
     %cond = vm.const.i32 0
 
-    %res:3 = vm.call @_branch_arg_cycling(%ref_a, %ref_b, %ref_c, %cond) 
+    %res:3 = vm.call @_branch_arg_cycling(%ref_a, %ref_b, %ref_c, %cond)
         : (!vm.buffer, !vm.buffer, !vm.buffer, i32) -> (!vm.buffer, !vm.buffer, !vm.buffer)
     vm.check.eq %res#0, %ref_b : !vm.buffer
     vm.check.eq %res#1, %ref_c : !vm.buffer
     vm.check.eq %res#2, %ref_a : !vm.buffer
-    
+
     vm.return
   }
 
   vm.func private @_branch_arg_cycling(%arg0 : !vm.buffer, %arg1: !vm.buffer,
-                                       %arg2: !vm.buffer, %arg3: i32) 
+                                       %arg2: !vm.buffer, %arg3: i32)
       -> (!vm.buffer, !vm.buffer, !vm.buffer) attributes {noinline} {
-    vm.cond_br %arg3, 
-               ^bb1(%arg0, %arg1, %arg2: !vm.buffer, !vm.buffer, !vm.buffer), 
+    vm.cond_br %arg3,
+               ^bb1(%arg0, %arg1, %arg2: !vm.buffer, !vm.buffer, !vm.buffer),
                ^bb2(%arg1, %arg2, %arg0, %arg3: !vm.buffer, !vm.buffer, !vm.buffer, i32)
   ^bb1(%a: !vm.buffer, %b: !vm.buffer, %c: !vm.buffer):
     vm.return %a, %b, %c : !vm.buffer, !vm.buffer, !vm.buffer
diff --git a/runtime/src/iree/vm/test/conversion_ops.mlir b/runtime/src/iree/vm/test/conversion_ops.mlir
index 799376e..dcee2ce 100644
--- a/runtime/src/iree/vm/test/conversion_ops.mlir
+++ b/runtime/src/iree/vm/test/conversion_ops.mlir
@@ -7,7 +7,7 @@
   vm.export @test_trunc_i32_i8
   vm.func @test_trunc_i32_i8() {
     %c1 = vm.const.i32 2147483647
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.trunc.i32.i8 %c1dno : i32 -> i32
     %c2 = vm.const.i32 255
     vm.check.eq %v, %c2, "truncate unsigned i32 to unsigned i8" : i32
@@ -17,7 +17,7 @@
   vm.export @test_trunc_i32_i16
   vm.func @test_trunc_i32_i16() {
     %c1 = vm.const.i32 2147483647
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.trunc.i32.i16 %c1dno : i32 -> i32
     %c2 = vm.const.i32 65535
     vm.check.eq %v, %c2, "truncate unsigned i32 to unsigned i16" : i32
diff --git a/runtime/src/iree/vm/test/conversion_ops_f32.mlir b/runtime/src/iree/vm/test/conversion_ops_f32.mlir
index 650f3a2..9dc216d 100644
--- a/runtime/src/iree/vm/test/conversion_ops_f32.mlir
+++ b/runtime/src/iree/vm/test/conversion_ops_f32.mlir
@@ -8,7 +8,7 @@
   vm.export @test_bitcast_i32_f32
   vm.func @test_bitcast_i32_f32() {
     %c1 = vm.const.i32 1085276160
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.bitcast.i32.f32 %c1dno : i32 -> f32
     %c2 = vm.const.f32 5.5
     vm.check.eq %v, %c2, "bitcast i32 to f32" : f32
@@ -19,7 +19,7 @@
   vm.export @test_bitcast_f32_i32
   vm.func @test_bitcast_f32_i32() {
     %c1 = vm.const.f32 5.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.bitcast.f32.i32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 1085276160
     vm.check.eq %v, %c2, "bitcast f32 to i32" : i32
@@ -29,7 +29,7 @@
   vm.export @test_cast_si32_f32_int_max
   vm.func @test_cast_si32_f32_int_max() {
     %c1 = vm.const.i32 2147483647
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.cast.si32.f32 %c1dno : i32 -> f32
     %c2 = vm.const.f32 2147483647.0
     vm.check.eq %v, %c2, "cast signed integer to a floating-point value" : f32
@@ -39,7 +39,7 @@
   vm.export @test_cast_si32_f32_int_min
   vm.func @test_cast_si32_f32_int_min() {
     %c1 = vm.const.i32 -2147483648
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.cast.si32.f32 %c1dno : i32 -> f32
     %c2 = vm.const.f32 -2147483648.0
     vm.check.eq %v, %c2, "cast signed integer to a floating-point value" : f32
@@ -49,7 +49,7 @@
   vm.export @test_cast_ui32_f32_int_max
   vm.func @test_cast_ui32_f32_int_max() {
     %c1 = vm.const.i32 4294967295
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %v = vm.cast.ui32.f32 %c1dno : i32 -> f32
     %c2 = vm.const.f32 4294967295.0
     vm.check.eq %v, %c2, "cast unsigned integer to a floating-point value" : f32
@@ -61,7 +61,7 @@
     // This is the maximum value that is representable precisely as both i32
     // and f32. An exponent of 30 with all mantissa bits set.
     %c1 = vm.const.f32 0x4effffff
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.si32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 0x7FFFFF80
     vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
@@ -71,7 +71,7 @@
   vm.export @test_cast_f32_si32_int_min
   vm.func @test_cast_f32_si32_int_min() {
     %c1 = vm.const.f32 -2147483648.0
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.si32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 -2147483648
     vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
@@ -81,7 +81,7 @@
   vm.export @test_cast_f32_si32_away_from_zero_pos
   vm.func @test_cast_f32_si32_away_from_zero_pos() {
     %c1 = vm.const.f32 2.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.si32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 3
     vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
@@ -91,7 +91,7 @@
   vm.export @test_cast_f32_si32_away_from_zero_neg
   vm.func @test_cast_f32_si32_away_from_zero_neg() {
     %c1 = vm.const.f32 -2.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.si32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 -3
     vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
@@ -103,7 +103,7 @@
     // This is the maximum value that is representable precisely as both ui32
     // and f32. An exponent of 31 with all mantissa bits set.
     %c1 = vm.const.f32 0x4f7fffff
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.ui32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 0xFFFFFF00
     vm.check.eq %v, %c2, "cast floating-point value to an unsigned integer" : i32
@@ -113,7 +113,7 @@
   vm.export @test_cast_f32_ui32_away_from_zero
   vm.func @test_cast_f32_ui32_away_from_zero() {
     %c1 = vm.const.f32 2.5
-    %c1dno = util.do_not_optimize(%c1) : f32
+    %c1dno = util.optimization_barrier %c1 : f32
     %v = vm.cast.f32.ui32 %c1dno : f32 -> i32
     %c2 = vm.const.i32 3
     vm.check.eq %v, %c2, "cast floating-point value to a signed integer" : i32
diff --git a/runtime/src/iree/vm/test/conversion_ops_i64.mlir b/runtime/src/iree/vm/test/conversion_ops_i64.mlir
index f790e5a..4ab99fa 100644
--- a/runtime/src/iree/vm/test/conversion_ops_i64.mlir
+++ b/runtime/src/iree/vm/test/conversion_ops_i64.mlir
@@ -7,7 +7,7 @@
   vm.export @test_trunc_i64_i32
   vm.func @test_trunc_i64_i32() {
     %c1 = vm.const.i64 9223372036854775807
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %v = vm.trunc.i64.i32 %c1dno : i64 -> i32
     %c2 = vm.const.i32 4294967295
     vm.check.eq %v, %c2, "truncate unsigned i64 to unsigned i32" : i32
diff --git a/runtime/src/iree/vm/test/global_ops.mlir b/runtime/src/iree/vm/test/global_ops.mlir
index 24ade84..263e7b5 100644
--- a/runtime/src/iree/vm/test/global_ops.mlir
+++ b/runtime/src/iree/vm/test/global_ops.mlir
@@ -22,7 +22,7 @@
   vm.func @test_global_load_ref() {
     %actual = vm.global.load.ref @g0 : !vm.buffer
     %expected = vm.const.ref.zero : !vm.buffer
-    %expecteddno = util.do_not_optimize(%expected) : !vm.buffer
+    %expecteddno = util.optimization_barrier %expected : !vm.buffer
     vm.check.eq %actual, %expecteddno : !vm.buffer
     vm.return
   }
diff --git a/runtime/src/iree/vm/test/list_ops.mlir b/runtime/src/iree/vm/test/list_ops.mlir
index 81e6b95..696be36 100644
--- a/runtime/src/iree/vm/test/list_ops.mlir
+++ b/runtime/src/iree/vm/test/list_ops.mlir
@@ -12,7 +12,7 @@
     %list = vm.list.alloc %c42 : (i32) -> !vm.list<i8>
     vm.list.reserve %list, %c100 : (!vm.list<i8>, i32)
     %sz = vm.list.size %list : (!vm.list<i8>) -> i32
-    %sz_dno = util.do_not_optimize(%sz) : i32
+    %sz_dno = util.optimization_barrier %sz : i32
     vm.check.eq %sz_dno, %c0, "list<i8>.empty.size()=0" : i32
     vm.return
   }
@@ -107,7 +107,7 @@
     %list = vm.list.alloc %c1 : (i32) -> !vm.list<i32>
     vm.list.resize %list, %c1 : (!vm.list<i32>, i32)
     %v = vm.list.get.i32 %list, %c1 : (!vm.list<i32>, i32) -> i32
-    %v_dno = util.do_not_optimize(%v) : i32
+    %v_dno = util.optimization_barrier %v : i32
     // Add a dummy use of %v_dno to please recent versions of clang for the C target
     vm.list.set.i32 %list, %c1, %v_dno : (!vm.list<i32>, i32, i32)
     vm.return
diff --git a/runtime/src/iree/vm/test/list_variant_ops.mlir b/runtime/src/iree/vm/test/list_variant_ops.mlir
index 5a8d23c..202c92e 100644
--- a/runtime/src/iree/vm/test/list_variant_ops.mlir
+++ b/runtime/src/iree/vm/test/list_variant_ops.mlir
@@ -113,7 +113,7 @@
     vm.list.resize %list, %c1 : (!vm.list<?>, i32)
 
     %ref = vm.list.get.ref %list, %c1 : (!vm.list<?>, i32) -> !vm.buffer
-    %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+    %ref_dno = util.optimization_barrier %ref : !vm.buffer
     vm.return
   }
 
diff --git a/runtime/src/iree/vm/test/ref_ops.mlir b/runtime/src/iree/vm/test/ref_ops.mlir
index 862a75e..ba6c486 100644
--- a/runtime/src/iree/vm/test/ref_ops.mlir
+++ b/runtime/src/iree/vm/test/ref_ops.mlir
@@ -6,7 +6,7 @@
   vm.export @test_zero_ref_eq
   vm.func @test_zero_ref_eq() {
     %ref = vm.const.ref.zero : !vm.ref<?>
-    %ref_dno = util.do_not_optimize(%ref) : !vm.ref<?>
+    %ref_dno = util.optimization_barrier %ref : !vm.ref<?>
     vm.check.eq %ref_dno, %ref_dno : !vm.ref<?>
     vm.return
   }
@@ -15,13 +15,13 @@
   // ordinal allocation and vm to EmitC conversion to prevent constant folding
   // of the tests during the lattter. This means we would need to add a pattern
   // that inserts calls to `iree_vm_ref_retain` for operand/result pairs of the
-  // do_not_optimize op.
+  // barrier op.
   vm.export @test_ref_eq attributes {emitc.exclude}
   vm.func @test_ref_eq() {
     %ref_1 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
-    %ref_1_dno = util.do_not_optimize(%ref_1) : !vm.buffer
+    %ref_1_dno = util.optimization_barrier %ref_1 : !vm.buffer
     %ref_2 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
-    %ref_2_dno = util.do_not_optimize(%ref_2) : !vm.buffer
+    %ref_2_dno = util.optimization_barrier %ref_2 : !vm.buffer
     vm.check.eq %ref_1_dno, %ref_2_dno : !vm.buffer
     vm.return
   }
@@ -29,9 +29,9 @@
   vm.export @test_ref_ne
   vm.func @test_ref_ne() {
     %ref_i8 = vm.const.ref.rodata @buffer_i8 : !vm.buffer
-    %ref_i8_dno = util.do_not_optimize(%ref_i8) : !vm.buffer
+    %ref_i8_dno = util.optimization_barrier %ref_i8 : !vm.buffer
     %ref_i32 = vm.const.ref.rodata @buffer_i32 : !vm.buffer
-    %ref_i32_dno = util.do_not_optimize(%ref_i32) : !vm.buffer
+    %ref_i32_dno = util.optimization_barrier %ref_i32 : !vm.buffer
     vm.check.ne %ref_i8_dno, %ref_i32_dno : !vm.buffer
     vm.return
   }
@@ -39,7 +39,7 @@
   vm.export @test_ref_nz
   vm.func @test_ref_nz() {
     %ref = vm.const.ref.rodata @buffer_i8 : !vm.buffer
-    %ref_dno = util.do_not_optimize(%ref) : !vm.buffer
+    %ref_dno = util.optimization_barrier %ref : !vm.buffer
     vm.check.nz %ref_dno : !vm.buffer
     vm.return
   }
diff --git a/runtime/src/iree/vm/test/shift_ops.mlir b/runtime/src/iree/vm/test/shift_ops.mlir
index 4905ea9..b1e618d 100644
--- a/runtime/src/iree/vm/test/shift_ops.mlir
+++ b/runtime/src/iree/vm/test/shift_ops.mlir
@@ -7,7 +7,7 @@
   vm.export @test_shl_i32
   vm.func @test_shl_i32() {
     %c1 = vm.const.i32 1
-    %c1dno = util.do_not_optimize(%c1) : i32
+    %c1dno = util.optimization_barrier %c1 : i32
     %c2 = vm.const.i32 2
     %v = vm.shl.i32 %c1dno, %c2 : i32
     %c4 = vm.const.i32 4
@@ -18,7 +18,7 @@
   vm.export @test_shr_i32s
   vm.func @test_shr_i32s() {
     %cn1 = vm.const.i32 -1
-    %cn1dno = util.do_not_optimize(%cn1) : i32
+    %cn1dno = util.optimization_barrier %cn1 : i32
     %c2 = vm.const.i32 2
     %v = vm.shr.i32.s %cn1dno, %c2 : i32
     vm.check.eq %v, %cn1dno, "-1>>2=-1" : i32
@@ -28,7 +28,7 @@
   vm.export @test_shr_i32u
   vm.func @test_shr_i32u() {
     %c4 = vm.const.i32 4
-    %c4dno = util.do_not_optimize(%c4) : i32
+    %c4dno = util.optimization_barrier %c4 : i32
     %c2 = vm.const.i32 2
     %v = vm.shr.i32.u %c4dno, %c2 : i32
     %c1 = vm.const.i32 1
diff --git a/runtime/src/iree/vm/test/shift_ops_i64.mlir b/runtime/src/iree/vm/test/shift_ops_i64.mlir
index 6632b2e..00c0724 100644
--- a/runtime/src/iree/vm/test/shift_ops_i64.mlir
+++ b/runtime/src/iree/vm/test/shift_ops_i64.mlir
@@ -7,7 +7,7 @@
   vm.export @test_shl_i64
   vm.func @test_shl_i64() {
     %c1 = vm.const.i64 1
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %shamt = vm.const.i32 2
     %v = vm.shl.i64 %c1dno, %shamt : i64
     %c4 = vm.const.i64 4
@@ -18,7 +18,7 @@
   vm.export @test_shr_i64s
   vm.func @test_shr_i64s() {
     %c1 = vm.const.i64 -1
-    %c1dno = util.do_not_optimize(%c1) : i64
+    %c1dno = util.optimization_barrier %c1 : i64
     %shamt = vm.const.i32 2
     %v = vm.shr.i64.s %c1dno, %shamt : i64
     %cn1 = vm.const.i64 -1
@@ -29,7 +29,7 @@
   vm.export @test_shr_i64u
   vm.func @test_shr_i64u() {
     %c4 = vm.const.i64 4
-    %c4dno = util.do_not_optimize(%c4) : i64
+    %c4dno = util.optimization_barrier %c4 : i64
     %shamt = vm.const.i32 2
     %v = vm.shr.i64.u %c4dno, %shamt : i64
     %c1 = vm.const.i64 1
diff --git a/samples/custom_module/test/example.mlir b/samples/custom_module/test/example.mlir
index 3c9ca5e..07aa11f 100644
--- a/samples/custom_module/test/example.mlir
+++ b/samples/custom_module/test/example.mlir
@@ -60,7 +60,7 @@
     // We don't do anything with it here but just demonstrate how index works.
     // CHECK-NEXT: LENGTH hello = 5
     %strlen = call @custom.string.length(%hello_str) : (!custom.string) -> index
-    util.do_not_optimize(%strlen) : index
+    util.optimization_barrier %strlen : index
 
     // Print "debug" if the runtime is compiled in debug mode and otherwise
     // prints "optimized".
diff --git a/tests/compiler_driver/smoketest.mlir b/tests/compiler_driver/smoketest.mlir
index 790966a..9f21cb3 100644
--- a/tests/compiler_driver/smoketest.mlir
+++ b/tests/compiler_driver/smoketest.mlir
@@ -34,7 +34,7 @@
 // CHECK: "local_name": "add"
 func.func @add() -> i32 {
   %c1 = arith.constant 1 : i32
-  %unf_c1 = util.do_not_optimize(%c1) : i32
+  %unf_c1 = util.optimization_barrier %c1 : i32
   %unf_c2 = util.unfoldable_constant 2 : i32
   %result = arith.addi %unf_c1, %unf_c2 : i32
   return %result : i32
diff --git a/tests/e2e/linalg_transform/linalg_transform.mlir b/tests/e2e/linalg_transform/linalg_transform.mlir
index e493196..0b23c46 100644
--- a/tests/e2e/linalg_transform/linalg_transform.mlir
+++ b/tests/e2e/linalg_transform/linalg_transform.mlir
@@ -26,7 +26,7 @@
   %matmul = linalg.matmul
       ins(%lhs, %rhs : tensor<5x3xf32>, tensor<3x5xf32>)
       outs(%res : tensor<5x5xf32>) -> tensor<5x5xf32>
-  %matmul_res = util.do_not_optimize(%matmul) : tensor<5x5xf32>
+  %matmul_res = util.optimization_barrier %matmul : tensor<5x5xf32>
 
   return %matmul_res : tensor<5x5xf32>
 }
diff --git a/tests/e2e/regression/dynamic_abs.mlir b/tests/e2e/regression/dynamic_abs.mlir
index b7d512d..384832d 100644
--- a/tests/e2e/regression/dynamic_abs.mlir
+++ b/tests/e2e/regression/dynamic_abs.mlir
@@ -1,7 +1,7 @@
 func.func @dynamic_tensor() {
   %input = flow.tensor.constant dense<[[-1.0, 2.0, -3.0], [4.0, -5.0, 6.0]]> : tensor<2x3xf32> -> tensor<?x?xf32>
   %res = "mhlo.abs"(%input) : (tensor<?x?xf32>) -> tensor<?x?xf32>
-  %dshape = util.do_not_optimize(%res) : tensor<?x?xf32>
+  %dshape = util.optimization_barrier %res : tensor<?x?xf32>
   %result = tensor.cast %dshape : tensor<?x?xf32> to tensor<2x3xf32>
   check.expect_almost_eq_const(%result, dense<[[1.0, 2.0, 3.0],[4.0, 5.0, 6.0]]> : tensor<2x3xf32>) : tensor<2x3xf32>
   return
diff --git a/tests/e2e/regression/dynamic_add.mlir b/tests/e2e/regression/dynamic_add.mlir
index e832da7..fca0c55 100644
--- a/tests/e2e/regression/dynamic_add.mlir
+++ b/tests/e2e/regression/dynamic_add.mlir
@@ -2,7 +2,7 @@
   %lhs = flow.tensor.constant  dense<[[1.0,2.0,3.0,4.0],[-1.0,-2.0,-3.0,-4.0]]> : tensor<2x4xf32> -> tensor<?x4xf32>
   %rhs = flow.tensor.constant  dense<[[5.0,6.0,7.0,8.0],[-5.0,-6.0,-7.0,-8.0]]> : tensor<2x4xf32> -> tensor<?x4xf32>
   %2 = "mhlo.add"(%lhs, %rhs) : (tensor<?x4xf32>, tensor<?x4xf32>) -> tensor<?x4xf32>
-  %3 = util.do_not_optimize(%2) : tensor<?x4xf32>
+  %3 = util.optimization_barrier %2 : tensor<?x4xf32>
   %result = tensor.cast %3 : tensor<?x4xf32> to tensor<2x4xf32>
   check.expect_almost_eq_const(%result, dense<[[6.0, 8.0, 10.0, 12.0],[-6.0, -8.0, -10.0, -12.0]]> : tensor<2x4xf32>) : tensor<2x4xf32>
   return
diff --git a/tests/e2e/regression/dynamic_dot.mlir b/tests/e2e/regression/dynamic_dot.mlir
index 497b11b..e7dafe9 100644
--- a/tests/e2e/regression/dynamic_dot.mlir
+++ b/tests/e2e/regression/dynamic_dot.mlir
@@ -10,7 +10,7 @@
     [10.0, 09.0, 08.0, 07.0, 06.0],
     [05.0, 04.0, 03.0, 02.0, 01.0]]> : tensor<3x5xf32> -> tensor<?x?xf32>
   %res = "mhlo.dot"(%lhs, %rhs) : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  %dshape = util.do_not_optimize(%res) : tensor<?x?xf32>
+  %dshape = util.optimization_barrier %res : tensor<?x?xf32>
   %result = tensor.cast %dshape : tensor<?x?xf32> to tensor<5x5xf32>
   check.expect_almost_eq_const(%result,
     dense<[[430.0, 388.0, 346.0, 304.0, 262.0],
diff --git a/tests/e2e/regression/dynamic_torch_index_select_high_rank.mlir b/tests/e2e/regression/dynamic_torch_index_select_high_rank.mlir
index b5689dc..6a4736d 100644
--- a/tests/e2e/regression/dynamic_torch_index_select_high_rank.mlir
+++ b/tests/e2e/regression/dynamic_torch_index_select_high_rank.mlir
@@ -2,7 +2,7 @@
   %lhs = flow.tensor.constant  dense<[[6,7],[8,9]]> : tensor<2x2xi32> -> tensor<?x?xi32>
   %rhs = flow.tensor.constant  dense<[[[[0,1],[1,0]],[[0,0],[1,1]]],[[[1,1],[0,0]],[[0,1],[1,0]]]]> : tensor<2x2x2x2xi32> -> tensor<?x?x?x?xi32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 1 : i64, dim = 1 : i64} : (tensor<?x?xi32>, tensor<?x?x?x?xi32>) -> tensor<?x?x?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?x?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?x?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?x?x?xi32> to tensor<2x2x2x2xi32>
   check.expect_eq_const(%result,
     dense<[[[[6, 7],[7, 6]],
@@ -16,7 +16,7 @@
   %lhs = flow.tensor.constant  dense<[[6,7],[8,9]]> : tensor<2x2xi32> -> tensor<?x?xi32>
   %rhs = flow.tensor.constant  dense<[[[[0,1],[1,0]],[[0,0],[1,1]]],[[[1,1],[0,0]],[[0,1],[1,0]]]]> : tensor<2x2x2x2xi32> -> tensor<?x?x?x?xi32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?xi32>, tensor<?x?x?x?xi32>) -> tensor<?x?x?x?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?x?x?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?x?x?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?x?x?x?xi32> to tensor<2x2x2x2x2xi32>
   check.expect_eq_const(%result,
     dense<[[[[[6, 7],[8, 9]],
diff --git a/tests/e2e/regression/dynamic_torch_index_select_negative.mlir b/tests/e2e/regression/dynamic_torch_index_select_negative.mlir
index feaa5d6..57ced49 100644
--- a/tests/e2e/regression/dynamic_torch_index_select_negative.mlir
+++ b/tests/e2e/regression/dynamic_torch_index_select_negative.mlir
@@ -2,7 +2,7 @@
   %lhs = flow.tensor.constant  dense<[[[100, 101],[110, 111]],[[200, 201],[210, 211]]]> : tensor<2x2x2xi32> -> tensor<?x?x?xi32>
   %rhs = flow.tensor.constant  dense<[[[0, 1],[1, 0]],[[0, 0],[1, 1]]]> : tensor<2x2x2xi32> -> tensor<?x?x?xi32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = -1 : i64, dim = -1 : i64} : (tensor<?x?x?xi32>, tensor<?x?x?xi32>) -> tensor<?x?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?x?xi32> to tensor<2x2x2xi32>
   check.expect_eq_const(%result,
     dense<[[[100, 101],[111, 110]],
diff --git a/tests/e2e/regression/dynamic_torch_index_select_scalar.mlir b/tests/e2e/regression/dynamic_torch_index_select_scalar.mlir
index c4d1391..e8aeced 100644
--- a/tests/e2e/regression/dynamic_torch_index_select_scalar.mlir
+++ b/tests/e2e/regression/dynamic_torch_index_select_scalar.mlir
@@ -7,7 +7,7 @@
            [[21,22,23,24,25]]]> : tensor<5x1x5xi32> -> tensor<?x?x?xi32>
   %rhs = util.unfoldable_constant dense<0> : tensor<i32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?x?xi32>, tensor<i32>) -> tensor<?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?xi32> to tensor<1x5xi32>
   check.expect_eq_const(%result,
     dense<[[1, 2, 3, 4, 5]]> : tensor<1x5xi32>) : tensor<1x5xi32>
@@ -23,7 +23,7 @@
            [[21,22,23,24,25]]]> : tensor<5x1x5xi32> -> tensor<?x?x?xi32>
   %rhs = util.unfoldable_constant dense<0> : tensor<i32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 0 : i64, dim = 1 : i64} : (tensor<?x?x?xi32>, tensor<i32>) -> tensor<?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?xi32> to tensor<5x5xi32>
   check.expect_eq_const(%result,
     dense<[[1, 2, 3, 4, 5],
diff --git a/tests/e2e/regression/dynamic_torch_index_select_vector.mlir b/tests/e2e/regression/dynamic_torch_index_select_vector.mlir
index 98693ad..3e78f7c 100644
--- a/tests/e2e/regression/dynamic_torch_index_select_vector.mlir
+++ b/tests/e2e/regression/dynamic_torch_index_select_vector.mlir
@@ -3,7 +3,7 @@
     dense<[[[1, 2],[3, 4]],[[5, 6],[7, 8]],[[9, 10],[11, 12]]]> : tensor<3x2x2xi32> -> tensor<?x?x?xi32>
   %rhs = flow.tensor.constant dense<[0, 1]> : tensor<2xi32> -> tensor<?xi32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 0 : i64, dim = 1 : i64} : (tensor<?x?x?xi32>, tensor<?xi32>) -> tensor<?x?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?x?xi32> to tensor<3x2x2xi32>
   check.expect_eq_const(%result,
     dense<[[[1, 2],[3, 4]],
@@ -17,7 +17,7 @@
     dense<[[[1, 2],[3, 4]],[[5, 6],[7, 8]],[[9, 10],[11, 12]]]> : tensor<3x2x2xi32> -> tensor<?x?x?xi32>
   %rhs = flow.tensor.constant dense<[0, 1]> : tensor<2xi32> -> tensor<?xi32>
   %0 = "mhlo.torch_index_select"(%lhs, %rhs) {batch_dims = 0 : i64, dim = 0 : i64} : (tensor<?x?x?xi32>, tensor<?xi32>) -> tensor<?x?x?xi32>
-  %dshape = util.do_not_optimize(%0) : tensor<?x?x?xi32>
+  %dshape = util.optimization_barrier %0 : tensor<?x?x?xi32>
   %result = tensor.cast %dshape : tensor<?x?x?xi32> to tensor<2x2x2xi32>
   check.expect_eq_const(%result,
     dense<[[[1, 2],[3, 4]],
diff --git a/tests/e2e/regression/layernorm.mlir b/tests/e2e/regression/layernorm.mlir
index 098b7b4..f165e25 100644
--- a/tests/e2e/regression/layernorm.mlir
+++ b/tests/e2e/regression/layernorm.mlir
@@ -28,8 +28,8 @@
   %cst_2 = arith.constant dense<9.99999996E-13> : tensor<128x1xf32>
   %cst_3 = arith.constant dense<3.840000e+02> : tensor<128x1xf32>
   %cst_4 = arith.constant dense<5.000000e+00> : tensor<128x384xf32>
-  %0 = util.do_not_optimize(%cst_4) : tensor<128x384xf32>
-  %1 = util.do_not_optimize(%cst_3) : tensor<128x1xf32>
+  %0 = util.optimization_barrier %cst_4 : tensor<128x384xf32>
+  %1 = util.optimization_barrier %cst_3 : tensor<128x1xf32>
   %2 = tensor.empty() : tensor<128xf32>
   %3 = linalg.fill ins(%cst_0 : f32) outs(%2 : tensor<128xf32>) -> tensor<128xf32>
   %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%0 : tensor<128x384xf32>) outs(%3 : tensor<128xf32>) {
@@ -58,7 +58,7 @@
     %16 = arith.addf %15, %arg1 : f32
     linalg.yield %16 : f32
   } -> tensor<128xf32>
-  %12 = util.do_not_optimize(%cst_2) : tensor<128x1xf32>
+  %12 = util.optimization_barrier %cst_2 : tensor<128x1xf32>
   %13 = tensor.collapse_shape %12 [[0, 1]] : tensor<128x1xf32> into tensor<128xf32>
   %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%9, %11, %7, %13 : tensor<128x384xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) outs(%8 : tensor<128x384xf32>) {
   ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
diff --git a/tests/e2e/regression/reduction_broadcast_elementwise.mlir b/tests/e2e/regression/reduction_broadcast_elementwise.mlir
index 8c546fd..d89bc3b 100644
--- a/tests/e2e/regression/reduction_broadcast_elementwise.mlir
+++ b/tests/e2e/regression/reduction_broadcast_elementwise.mlir
@@ -13,7 +13,7 @@
   %cst = arith.constant -3.40282347E+38 : f32
   %cst_0 = arith.constant dense<1.000000e+00> : tensor<12x128x128xf32>
   %cst_1 = arith.constant dense<5.000000e+00> : tensor<12x128x128xf32>
-  %0 = util.do_not_optimize(%cst_1) : tensor<12x128x128xf32>
+  %0 = util.optimization_barrier %cst_1 : tensor<12x128x128xf32>
   %1 = tensor.empty() : tensor<12x128xf32>
   %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<12x128xf32>) -> tensor<12x128xf32>
   %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%0 : tensor<12x128x128xf32>) outs(%2 : tensor<12x128xf32>) {
diff --git a/tests/e2e/regression/softmax.mlir b/tests/e2e/regression/softmax.mlir
index 0bbe13c..64d0795 100644
--- a/tests/e2e/regression/softmax.mlir
+++ b/tests/e2e/regression/softmax.mlir
@@ -18,7 +18,7 @@
   %cst_1 = arith.constant -3.40282347E+38 : f32
   %cst_2 = arith.constant dense<7.812500e-03> : tensor<12x128x128xf32>
   %cst_3 = arith.constant dense<5.000000e+00> : tensor<12x128x128xf32>
-  %0 = util.do_not_optimize(%cst_3) : tensor<12x128x128xf32>
+  %0 = util.optimization_barrier %cst_3 : tensor<12x128x128xf32>
   %1 = tensor.empty() : tensor<12x128xf32>
   %2 = linalg.fill ins(%cst_1 : f32) outs(%1 : tensor<12x128xf32>) -> tensor<12x128xf32>
   %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%0 : tensor<12x128x128xf32>) outs(%2 : tensor<12x128xf32>) {
diff --git a/tests/transform_dialect/cuda/softmax.mlir b/tests/transform_dialect/cuda/softmax.mlir
index 978bb5e..e7959d4 100644
--- a/tests/transform_dialect/cuda/softmax.mlir
+++ b/tests/transform_dialect/cuda/softmax.mlir
@@ -49,19 +49,19 @@
 // Execution only checks that @max_sub_exp runs.
 //      CHECK: EXEC @max_sub_exp
 //      CHECK: 16x128x128xf32=[
-// CHECK-SAME:                [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
+// CHECK-SAME:                [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 
 func.func @max_sub_exp() -> !out_tensor_t {
   %cst = arith.constant -3.40282347E+38 : f32
   %cst_0 = arith.constant dense<1121212.000000e+00> : !out_tensor_t
   %cst_1 = arith.constant dense<5.000000e+00> : !out_tensor_t
-  %0 = util.do_not_optimize(%cst_1) : !out_tensor_t
+  %0 = util.optimization_barrier %cst_1 : !out_tensor_t
 
   %1 = tensor.empty() : !tmp_tensor_t
   %2 = linalg.fill ins(%cst : f32) outs(%1 : !tmp_tensor_t) -> !tmp_tensor_t
-  %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, 
-                                        affine_map<(d0, d1, d2) -> (d0, d1)>], 
-                       iterator_types = ["parallel", "parallel", "reduction"]} 
+  %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+                                        affine_map<(d0, d1, d2) -> (d0, d1)>],
+                       iterator_types = ["parallel", "parallel", "reduction"]}
   ins(%0 : !out_tensor_t) outs(%2 : !tmp_tensor_t) {
   ^bb0(%arg0: f32, %arg1: f32):
     %8 = arith.maxf %arg0, %arg1 : f32
@@ -72,7 +72,7 @@
   %4 = tensor.empty() : !out_tensor_t
   %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                                         affine_map<(d0, d1, d2) -> (d0, d1)>,
-                                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>], 
+                                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
                        iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%0, %3 : !out_tensor_t, !tmp_tensor_t) outs(%4 : !out_tensor_t) {
   ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):