Rewrite EmitC target and move logic from the custom printer into the conversion pass (#6607)

diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
index 8388fe9..5ef9709 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/CMakeLists.txt
@@ -21,6 +21,7 @@
       MLIREmitC
       MLIRIR
       MLIRPass
+      MLIRStandard
       MLIRTransforms
       iree::compiler::Dialect::IREE::Conversion::PreserveCompilerHints
       iree::compiler::Dialect::IREE::IR
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
index c4c1374..2f71bf0 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.cpp
@@ -13,6 +13,9 @@
 #include "iree/compiler/Dialect/VM/Utils/CallingConvention.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
@@ -23,15 +26,127 @@
 
 namespace {
 
-// TODO(simon-camp): This is adapted in the CModuleTarget.
-static Optional<std::string> buildFunctionName(IREE::VM::ModuleOp &moduleOp,
-                                               IREE::VM::FuncOp &funcOp) {
-  return std::string(moduleOp.getName()) + "_" + std::string(funcOp.getName()) +
-         "_impl";
+// TODO(simon-camp/marbre): Use this function throughout the conversions.
+Optional<std::string> getCType(Type type) {
+  if (auto iType = type.dyn_cast<IntegerType>()) {
+    switch (iType.getWidth()) {
+      case 32:
+      case 64:
+        return std::string("int") + std::to_string(iType.getWidth()) +
+               std::string("_t");
+    }
+  }
+
+  if (auto fType = type.dyn_cast<FloatType>()) {
+    switch (fType.getWidth()) {
+      case 32:
+        return std::string("float");
+      case 64:
+        return std::string("double");
+    }
+  }
+
+  if (auto oType = type.dyn_cast<emitc::OpaqueType>()) {
+    return std::string(oType.getValue());
+  }
+
+  return None;
 }
 
-static Optional<std::string> buildFunctionName(IREE::VM::ModuleOp &moduleOp,
-                                               IREE::VM::ImportOp &importOp) {
+LogicalResult convertFuncOp(IREE::VM::FuncOp funcOp,
+                            VMAnalysisCache &vmAnalysisCache) {
+  auto ctx = funcOp.getContext();
+  auto loc = funcOp.getLoc();
+
+  OpBuilder builder(funcOp);
+
+  auto moduleOp = funcOp.getOperation()->getParentOfType<IREE::VM::ModuleOp>();
+
+  FunctionType funcType = funcOp.getType();
+  std::string name =
+      std::string(moduleOp.getName()) + "_" + std::string(funcOp.getName());
+  std::string moduleTypeName = (moduleOp.getName() + "_t*").str();
+  std::string moduleStateTypeName = (moduleOp.getName() + "_state_t*").str();
+
+  Type stackType = mlir::emitc::OpaqueType::get(ctx, "iree_vm_stack_t*");
+  Type moduleType = mlir::emitc::OpaqueType::get(ctx, moduleTypeName);
+  Type moduleStateType = mlir::emitc::OpaqueType::get(ctx, moduleStateTypeName);
+
+  SmallVector<Type, 3> inputTypes = {stackType, moduleType, moduleStateType};
+  SmallVector<Type, 1> outputTypes;
+
+  for (auto &inputType : funcType.getInputs()) {
+    inputTypes.push_back(inputType);
+  }
+
+  for (auto &resultType : funcType.getResults()) {
+    Optional<std::string> cType = getCType(resultType);
+    if (!cType.hasValue()) {
+      return funcOp.emitError() << "unable to emit C type";
+    }
+    std::string cPtrType = cType.getValue() + std::string("*");
+    Type type = mlir::emitc::OpaqueType::get(ctx, cPtrType);
+    inputTypes.push_back(type);
+    outputTypes.push_back(type);
+  }
+
+  auto newFuncType = mlir::FunctionType::get(
+      ctx, {inputTypes}, {mlir::emitc::OpaqueType::get(ctx, "iree_status_t")});
+
+  auto newFuncOp = builder.create<mlir::FuncOp>(loc, name, newFuncType);
+  newFuncOp.getOperation()->setAttr("emitc.static", UnitAttr::get(ctx));
+
+  Optional<std::string> callingConvention = makeCallingConventionString(funcOp);
+
+  // Annotate new function with calling convention string which gets used in
+  // the CModuleTarget.
+  newFuncOp.getOperation()->setAttr(
+      "calling_convention", StringAttr::get(ctx, callingConvention.getValue()));
+
+  // This call shold be equivalent to rewriter.inlineRegionBefore()
+  newFuncOp.getBody().getBlocks().splice(newFuncOp.end(),
+                                         funcOp.getBody().getBlocks());
+
+  Block &entryBlock = newFuncOp.getBlocks().front();
+
+  entryBlock.insertArgument(static_cast<unsigned>(0), stackType);
+  entryBlock.insertArgument(static_cast<unsigned>(1), moduleType);
+  entryBlock.insertArgument(static_cast<unsigned>(2), moduleStateType);
+
+  entryBlock.addArguments(outputTypes);
+
+  auto ptr = vmAnalysisCache.find(funcOp.getOperation());
+  if (ptr == vmAnalysisCache.end()) {
+    return funcOp.emitError() << "parent func op not found in cache.";
+  }
+
+  // Add constant ops for refs
+  const int numRefs =
+      ptr->second.registerAllocation.getMaxRefRegisterOrdinal() + 1;
+
+  builder.setInsertionPointToStart(&entryBlock);
+
+  for (int i = 0; i < numRefs; i++) {
+    auto refOp = builder.create<emitc::ConstantOp>(
+        /*location=*/loc,
+        /*resultType=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t"),
+        /*value=*/emitc::OpaqueAttr::get(ctx, "{0}"));
+
+    // Mark local refs so that we can release them before a return operation
+    refOp.getOperation()->setAttr("ref_ordinal", builder.getIndexAttr(i));
+  }
+
+  vmAnalysisCache.insert(
+      std::make_pair(newFuncOp.getOperation(), std::move(ptr->second)));
+
+  if (failed(funcOp.replaceAllSymbolUses(name, moduleOp)))
+    return funcOp.emitError() << "unable to update symbol name in module";
+
+  return success();
+}
+
+Optional<std::string> buildFunctionName(IREE::VM::ModuleOp &moduleOp,
+                                        IREE::VM::ImportOp &importOp) {
   auto callingConvention = makeImportCallingConventionString(importOp);
   if (!callingConvention.hasValue()) {
     return None;
@@ -94,60 +209,218 @@
   return elementTypePtrOp;
 }
 
-/// Generate two calls which resemble the IREE_RETURN_IF_ERROR macro. We need
-/// to split it here becasue we cannot produce a macro invocation with a
-/// function call as argument in emitc. Locally allocated refs will be released
-/// before the return.
-emitc::CallOp failableCall(ConversionPatternRewriter &rewriter, Location loc,
-                           StringAttr callee, ArrayAttr args,
-                           ArrayAttr templateArgs, ArrayRef<Value> operands) {
+void releaseLocalRefs(ConversionPatternRewriter &rewriter, Location location,
+                      mlir::FuncOp funcOp) {
+  auto ctx = funcOp.getContext();
+
+  // Release local refs
+  for (auto constantOp : funcOp.getOps<emitc::ConstantOp>()) {
+    Operation *op = constantOp.getOperation();
+    if (!op->hasAttr("ref_ordinal")) continue;
+
+    auto refPtrOp = rewriter.create<emitc::ApplyOp>(
+        /*location=*/location,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
+        /*applicableOperator=*/StringAttr::get(ctx, "&"),
+        /*operand=*/constantOp.getResult());
+
+    rewriter.create<emitc::CallOp>(
+        /*location=*/location,
+        /*type=*/TypeRange{},
+        /*callee=*/StringAttr::get(ctx, "iree_vm_ref_release"),
+        /*args=*/ArrayAttr{},
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{refPtrOp.getResult()});
+  }
+}
+
+/// Generate a call op with one result and split the current block into a
+/// continuation and failure block based on the truthiness of the result
+/// value, i.e. a truthy value branches to the continuation block.
+emitc::CallOp failableCall(
+    ConversionPatternRewriter &rewriter, Location location, Type type,
+    StringAttr callee, ArrayAttr args, ArrayAttr templateArgs,
+    ArrayRef<Value> operands,
+    const std::function<void(emitc::CallOp &)> &failureBlockBuilder,
+    bool negateResult = false) {
   auto ctx = rewriter.getContext();
 
   auto callOp = rewriter.create<emitc::CallOp>(
-      /*location=*/loc,
-      /*type=*/emitc::OpaqueType::get(ctx, "iree_status_t"),
+      /*location=*/location,
+      /*type=*/type,
       /*callee=*/callee,
       /*args=*/args,
       /*templateArgs=*/templateArgs,
       /*operands=*/operands);
 
-  auto failOp = rewriter.create<emitc::CallOp>(
-      /*location=*/loc,
-      /*type=*/TypeRange{},
-      /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_ERROR"),
+  Type boolType = rewriter.getIntegerType(1);
+
+  auto conditionI1 = rewriter.create<emitc::CallOp>(
+      /*location=*/location,
+      /*type=*/boolType,
+      /*callee=*/StringAttr::get(ctx, "EMITC_CAST"),
       /*args=*/
-      ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                           emitc::OpaqueAttr::get(ctx, "local_refs")}),
+      ArrayAttr::get(ctx, {rewriter.getIndexAttr(0), TypeAttr::get(boolType)}),
       /*templateArgs=*/ArrayAttr{},
       /*operands=*/ArrayRef<Value>{callOp.getResult(0)});
+
+  if (negateResult)
+    conditionI1 = rewriter.create<emitc::CallOp>(
+        /*location=*/location,
+        /*type=*/boolType,
+        /*callee=*/StringAttr::get(ctx, "EMITC_NOT"),
+        /*args=*/ArrayAttr{},
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{conditionI1.getResult(0)});
+
+  // Start by splitting the block into two. The part before will contain the
+  // condition, and the part after will contain the continuation point.
+  Block *condBlock = rewriter.getInsertionBlock();
+  Block::iterator opPosition = rewriter.getInsertionPoint();
+  Block *continuationBlock = rewriter.splitBlock(condBlock, opPosition);
+
+  // Create a new block for the target of the failure.
+  Block *failureBlock;
+  {
+    OpBuilder::InsertionGuard guard(rewriter);
+    Region *parentRegion = condBlock->getParent();
+    failureBlock = rewriter.createBlock(parentRegion, parentRegion->end());
+
+    failureBlockBuilder(callOp);
+  }
+
+  rewriter.setInsertionPointToEnd(condBlock);
+  auto branchOp = rewriter.create<CondBranchOp>(
+      location, conditionI1.getResult(0), continuationBlock, failureBlock);
+
+  rewriter.setInsertionPoint(continuationBlock, opPosition);
+
   return callOp;
 }
 
-// TODO(simon-camp/marbre): Use this function throughout the conversions.
-Optional<std::string> getCType(Type type) {
-  if (auto iType = type.dyn_cast<IntegerType>()) {
-    switch (iType.getWidth()) {
-      case 32:
-      case 64:
-        return std::string("int") + std::to_string(iType.getWidth()) +
-               std::string("_t");
-    }
+emitc::CallOp returnIfError(ConversionPatternRewriter &rewriter,
+                            Location location, StringAttr callee,
+                            ArrayAttr args, ArrayAttr templateArgs,
+                            ArrayRef<Value> operands) {
+  auto blockBuilder = [&rewriter, &location](emitc::CallOp &callOp) {
+    auto ctx = rewriter.getContext();
+
+    Block *block = rewriter.getBlock();
+    mlir::FuncOp funcOp = cast<mlir::FuncOp>(block->getParentOp());
+
+    releaseLocalRefs(rewriter, location, funcOp);
+
+    rewriter.create<mlir::ReturnOp>(location, callOp.getResult(0));
+  };
+
+  auto ctx = rewriter.getContext();
+  Type type = emitc::OpaqueType::get(ctx, "iree_status_t");
+  return failableCall(rewriter, location, type, callee, args, templateArgs,
+                      operands, blockBuilder, /*negateResult=*/true);
+}
+
+emitc::CallOp failListNull(ConversionPatternRewriter &rewriter,
+                           Location location, Type type, StringAttr callee,
+                           ArrayAttr args, ArrayAttr templateArgs,
+                           ArrayRef<Value> operands) {
+  auto blockBuilder = [&rewriter, &location](emitc::CallOp &callOp) {
+    auto ctx = rewriter.getContext();
+
+    Block *block = rewriter.getBlock();
+    mlir::FuncOp funcOp = cast<mlir::FuncOp>(block->getParentOp());
+
+    releaseLocalRefs(rewriter, location, funcOp);
+
+    auto statusOp = rewriter.create<emitc::CallOp>(
+        /*location=*/location,
+        /*type=*/mlir::emitc::OpaqueType::get(ctx, "iree_status_t"),
+        /*callee=*/StringAttr::get(ctx, "iree_make_status"),
+        /*args=*/
+        ArrayAttr::get(ctx, {mlir::emitc::OpaqueAttr::get(
+                                ctx, "IREE_STATUS_INVALID_ARGUMENT")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{});
+
+    rewriter.create<mlir::ReturnOp>(location, statusOp.getResult(0));
+  };
+
+  return failableCall(rewriter, location, type, callee, args, templateArgs,
+                      operands, blockBuilder);
+}
+
+mlir::CallOp failableCall(
+    ConversionPatternRewriter &rewriter, Location location,
+    mlir::FuncOp &callee, ArrayRef<Value> operands,
+    const std::function<void(mlir::CallOp &)> &failureBlockBuilder,
+    bool negateResult = false) {
+  auto ctx = rewriter.getContext();
+
+  auto callOp = rewriter.create<mlir::CallOp>(
+      /*location=*/location,
+      /*callee=*/callee,
+      /*operands=*/operands);
+
+  Type boolType = rewriter.getIntegerType(1);
+
+  auto conditionI1 = rewriter.create<emitc::CallOp>(
+      /*location=*/location,
+      /*type=*/boolType,
+      /*callee=*/StringAttr::get(ctx, "EMITC_CAST"),
+      /*args=*/
+      ArrayAttr::get(ctx, {rewriter.getIndexAttr(0), TypeAttr::get(boolType)}),
+      /*templateArgs=*/ArrayAttr{},
+      /*operands=*/ArrayRef<Value>{callOp.getResult(0)});
+
+  if (negateResult)
+    conditionI1 = rewriter.create<emitc::CallOp>(
+        /*location=*/location,
+        /*type=*/boolType,
+        /*callee=*/StringAttr::get(ctx, "EMITC_NOT"),
+        /*args=*/ArrayAttr{},
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{conditionI1.getResult(0)});
+
+  // Start by splitting the block into two. The part before will contain the
+  // condition, and the part after will contain the continuation point.
+  Block *condBlock = rewriter.getInsertionBlock();
+  Block::iterator opPosition = rewriter.getInsertionPoint();
+  Block *continuationBlock = rewriter.splitBlock(condBlock, opPosition);
+
+  // Create a new block for the target of the failure.
+  Block *failureBlock;
+  {
+    OpBuilder::InsertionGuard guard(rewriter);
+    Region *parentRegion = condBlock->getParent();
+    failureBlock = rewriter.createBlock(parentRegion, parentRegion->end());
+
+    failureBlockBuilder(callOp);
   }
 
-  if (auto fType = type.dyn_cast<FloatType>()) {
-    switch (fType.getWidth()) {
-      case 32:
-        return std::string("float");
-      case 64:
-        return std::string("double");
-    }
-  }
+  rewriter.setInsertionPointToEnd(condBlock);
+  auto branchOp = rewriter.create<CondBranchOp>(
+      location, conditionI1.getResult(0), continuationBlock, failureBlock);
 
-  if (auto oType = type.dyn_cast<emitc::OpaqueType>()) {
-    return std::string(oType.getValue());
-  }
+  rewriter.setInsertionPoint(continuationBlock, opPosition);
 
-  return None;
+  return callOp;
+}
+
+mlir::CallOp returnIfError(ConversionPatternRewriter &rewriter,
+                           Location location, mlir::FuncOp &callee,
+                           ArrayRef<Value> operands) {
+  auto blockBuilder = [&rewriter, &location](mlir::CallOp &callOp) {
+    auto ctx = rewriter.getContext();
+
+    Block *block = rewriter.getBlock();
+    mlir::FuncOp funcOp = cast<mlir::FuncOp>(block->getParentOp());
+
+    releaseLocalRefs(rewriter, location, funcOp);
+
+    rewriter.create<mlir::ReturnOp>(location, callOp.getResult(0));
+  };
+
+  return failableCall(rewriter, location, callee, operands, blockBuilder,
+                      /*negateResult=*/true);
 }
 
 SmallVector<Attribute, 4> indexSequence(int64_t n, MLIRContext *ctx) {
@@ -172,11 +445,11 @@
 // Convert vm operations to emitc calls. The resultiong call has the ops
 // operands as arguments followed by an argument for every attribute.
 template <typename SrcOpTy>
-class CallOpConversion : public OpConversionPattern<SrcOpTy> {
+class GenericOpConversion : public OpConversionPattern<SrcOpTy> {
   using OpConversionPattern<SrcOpTy>::OpConversionPattern;
 
  public:
-  CallOpConversion(MLIRContext *context, StringRef funcName)
+  GenericOpConversion(MLIRContext *context, StringRef funcName)
       : OpConversionPattern<SrcOpTy>(context), funcName(funcName) {}
 
  private:
@@ -216,136 +489,178 @@
   StringRef funcName;
 };
 
-class VMCallOpConversion : public OpConversionPattern<IREE::VM::CallOp> {
+class CallOpConversion : public OpConversionPattern<IREE::VM::CallOp> {
   using OpConversionPattern<IREE::VM::CallOp>::OpConversionPattern;
 
  private:
   LogicalResult matchAndRewrite(
       IREE::VM::CallOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
-    auto ctx = op.getContext();
-    auto loc = op.getLoc();
-
-    auto funcOp =
-        lookupSymbolRef<IREE::VM::CallOp, IREE::VM::FuncOp>(op, "callee");
-    auto importOp =
+    mlir::FuncOp funcOp =
+        lookupSymbolRef<IREE::VM::CallOp, mlir::FuncOp>(op, "callee");
+    IREE::VM::ImportOp importOp =
         lookupSymbolRef<IREE::VM::CallOp, IREE::VM::ImportOp>(op, "callee");
 
-    assert(funcOp || importOp);
-    assert(!(funcOp && importOp));
+    if (!funcOp && !importOp)
+      return op.emitError() << "lookup of callee failed";
 
-    auto moduleOp =
-        funcOp ? funcOp.getOperation()->getParentOfType<IREE::VM::ModuleOp>()
-               : importOp.getOperation()->getParentOfType<IREE::VM::ModuleOp>();
+    if (funcOp && importOp)
+      return op.emitError() << "lookup of callee ambiguous";
 
     const bool isImported = importOp != nullptr;
 
-    Optional<std::string> funcName = isImported
-                                         ? buildFunctionName(moduleOp, importOp)
-                                         : buildFunctionName(moduleOp, funcOp);
+    return isImported ? rewriteImportedCall(op, operands, rewriter, importOp)
+                      : rewriteInternalCall(op, operands, rewriter, funcOp);
+  }
 
-    if (op.getNumResults() > 1) {
-      return op.emitError()
-             << "only internal calls with at most one result supported for now";
-    }
+  LogicalResult rewriteInternalCall(IREE::VM::CallOp op,
+                                    ArrayRef<Value> operands,
+                                    ConversionPatternRewriter &rewriter,
+                                    mlir::FuncOp funcOp) const {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
 
-    if (!funcName.hasValue()) {
-      return op.emitError() << "Couldn't build function name";
-    }
+    SmallVector<Value, 4> updatedOperands;
+    SmallVector<Value, 4> resultOperands;
 
-    SmallVector<Value, 4> updatedOperands(operands.begin(), operands.end());
+    auto parentFuncOp = op.getOperation()->getParentOfType<mlir::FuncOp>();
 
-    if (op.getNumResults() == 0) {
-      SmallVector<Attribute, 4> args;
-      if (!isImported) {
-        // The order is arguments, results, stack, state
-        args = indexSequence(updatedOperands.size(), ctx);
-        args.push_back(emitc::OpaqueAttr::get(ctx, "stack"));
-        args.push_back(emitc::OpaqueAttr::get(ctx, "state"));
-      } else {
-        int importOrdinal = 0;
-        // TODO(simon-camp): split into multiple EmitC ops
-        std::string importArg = std::string("&state->imports[") +
-                                std::to_string(importOrdinal) +
-                                std::string("]");
-        // The order is stack, import, arguments, results
-        args.push_back(emitc::OpaqueAttr::get(ctx, "stack"));
-        args.push_back(emitc::OpaqueAttr::get(ctx, importArg));
+    BlockArgument stackArg = parentFuncOp.getArgument(0);
+    BlockArgument moduleArg = parentFuncOp.getArgument(1);
+    BlockArgument moduleStateArg = parentFuncOp.getArgument(2);
 
-        for (auto operandIndex : indexSequence(updatedOperands.size(), ctx)) {
-          args.push_back(operandIndex);
-        }
-      }
+    updatedOperands = {stackArg, moduleArg, moduleStateArg};
 
-      auto callOp = failableCall(
-          /*rewriter=*/rewriter,
+    for (const Value &operand : operands) updatedOperands.push_back(operand);
+
+    // Create a variable for every result and a pointer to it as output
+    // parameter to the call.
+    for (OpResult result : op.getResults()) {
+      auto resultOp = rewriter.create<emitc::ConstantOp>(
           /*location=*/loc,
-          /*callee=*/StringAttr::get(ctx, funcName.getValue()),
-          /*args=*/rewriter.getArrayAttr(args),
-          /*templateArgs=*/ArrayAttr{},
-          /*operands=*/updatedOperands);
-
-      rewriter.eraseOp(op);
-
-      return success();
-    }
-
-    Type resultType = op.getType(0);
-    Optional<std::string> cType = getCType(resultType);
-
-    if (!cType.hasValue()) {
-      return op.emitError() << "unable to emit C type";
-    }
-
-    if (op.getNumResults() == 1) {
-      std::string cPtrType = cType.getValue() + std::string("*");
-
-      auto constantOp = rewriter.replaceOpWithNewOp<emitc::ConstantOp>(
-          /*op=*/op,
-          /*resultType=*/resultType,
+          /*resultType=*/result.getType(),
           /*value=*/emitc::OpaqueAttr::get(ctx, ""));
 
-      auto ptrOp = rewriter.create<emitc::ApplyOp>(
+      Optional<std::string> cType = getCType(result.getType());
+      if (!cType.hasValue()) return op.emitError() << "unable to emit C type";
+
+      std::string cPtrType = cType.getValue() + std::string("*");
+      auto resultPtrOp = rewriter.create<emitc::ApplyOp>(
           /*location=*/loc,
-          /*result=*/emitc::OpaqueType::get(ctx, cPtrType),
+          /*type=*/emitc::OpaqueType::get(ctx, cPtrType),
           /*applicableOperator=*/StringAttr::get(ctx, "&"),
-          /*operand=*/constantOp.getResult());
+          /*operand=*/resultOp);
 
-      updatedOperands.push_back(ptrOp.getResult());
-
-      SmallVector<Attribute, 4> args;
-      if (!isImported) {
-        // The order is arguments, results, stack, state
-        args = indexSequence(updatedOperands.size(), ctx);
-        args.push_back(emitc::OpaqueAttr::get(ctx, "stack"));
-        args.push_back(emitc::OpaqueAttr::get(ctx, "state"));
-      } else {
-        int importOrdinal = 0;
-        // TODO(simon-camp): split into multiple EmitC ops
-        std::string importArg = std::string("&state->imports[") +
-                                std::to_string(importOrdinal) +
-                                std::string("]");
-        // The order is stack, import, arguments, results
-        args.push_back(emitc::OpaqueAttr::get(ctx, "stack"));
-        args.push_back(emitc::OpaqueAttr::get(ctx, importArg));
-
-        for (auto operandIndex : indexSequence(updatedOperands.size(), ctx)) {
-          args.push_back(operandIndex);
-        }
-      }
-
-      auto callOp = failableCall(
-          /*rewriter=*/rewriter,
-          /*location=*/loc,
-          /*callee=*/StringAttr::get(ctx, funcName.getValue()),
-          /*args=*/rewriter.getArrayAttr(args),
-          /*templateArgs=*/ArrayAttr{},
-          /*operands=*/updatedOperands);
-
-      return success();
+      resultOperands.push_back(resultOp.getResult());
+      updatedOperands.push_back(resultPtrOp.getResult());
     }
 
-    return failure();
+    auto callOp = returnIfError(
+        /*rewriter=*/rewriter,
+        /*location=*/loc,
+        /*callee=*/funcOp,
+        /*operands=*/updatedOperands);
+
+    for (auto &pair : llvm::enumerate(op.getResults())) {
+      size_t index = pair.index();
+      OpResult result = pair.value();
+      result.replaceAllUsesWith(resultOperands[index]);
+    }
+
+    rewriter.eraseOp(op);
+
+    return success();
+  }
+
+  LogicalResult rewriteImportedCall(IREE::VM::CallOp op,
+                                    ArrayRef<Value> operands,
+                                    ConversionPatternRewriter &rewriter,
+                                    IREE::VM::ImportOp importOp) const {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
+
+    SmallVector<Value, 4> updatedOperands;
+    SmallVector<Value, 4> resultOperands;
+
+    auto moduleOp =
+        importOp.getOperation()->getParentOfType<IREE::VM::ModuleOp>();
+
+    Optional<std::string> funcName = buildFunctionName(moduleOp, importOp);
+
+    if (!funcName.hasValue())
+      return op.emitError() << "Couldn't build name to imported function";
+
+    int importOrdinal = importOp.ordinal().getValue().getZExtValue();
+
+    auto funcOp = op.getOperation()->template getParentOfType<mlir::FuncOp>();
+    BlockArgument stackArg = funcOp.getArgument(0);
+    BlockArgument stateArg = funcOp.getArgument(2);
+
+    auto imports = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_function_t*"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             emitc::OpaqueAttr::get(ctx, "imports")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{stateArg});
+
+    auto import = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_function_t*"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_ARRAY_ELEMENT_ADDRESS"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             rewriter.getUI32IntegerAttr(importOrdinal)}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{imports.getResult(0)});
+
+    updatedOperands = {stackArg, import.getResult(0)};
+
+    for (const Value &operand : operands) updatedOperands.push_back(operand);
+
+    // Create a variable for every result and a pointer to it as output
+    // parameter to the call.
+    for (OpResult result : op.getResults()) {
+      auto resultOp = rewriter.create<emitc::ConstantOp>(
+          /*location=*/loc,
+          /*resultType=*/result.getType(),
+          /*value=*/emitc::OpaqueAttr::get(ctx, ""));
+
+      Optional<std::string> cType = getCType(result.getType());
+      if (!cType.hasValue()) return op.emitError() << "unable to emit C type";
+
+      std::string cPtrType = cType.getValue() + std::string("*");
+      auto resultPtrOp = rewriter.create<emitc::ApplyOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, cPtrType),
+          /*applicableOperator=*/StringAttr::get(ctx, "&"),
+          /*operand=*/resultOp);
+
+      resultOperands.push_back(resultOp.getResult());
+      updatedOperands.push_back(resultPtrOp.getResult());
+    }
+
+    auto callOp = returnIfError(
+        /*rewriter=*/rewriter,
+        /*location=*/loc,
+        /*callee=*/StringAttr::get(ctx, funcName.getValue()),
+        /*args=*/ArrayAttr{},
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/updatedOperands);
+
+    for (auto &pair : llvm::enumerate(op.getResults())) {
+      size_t index = pair.index();
+      OpResult result = pair.value();
+      result.replaceAllUsesWith(resultOperands[index]);
+    }
+
+    rewriter.eraseOp(op);
+
+    return success();
+
+    return op.emitError() << "calls to imported function not supported yet";
   }
 };
 
@@ -368,7 +683,7 @@
     auto loc = cmpOp.getLoc();
 
     auto funcOp =
-        cmpOp.getOperation()->template getParentOfType<IREE::VM::FuncOp>();
+        cmpOp.getOperation()->template getParentOfType<mlir::FuncOp>();
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
       return cmpOp.emitError() << "parent func op not found in cache.";
@@ -433,7 +748,7 @@
     auto ctx = cmpOp.getContext();
     auto loc = cmpOp.getLoc();
 
-    auto funcOp = cmpOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
+    auto funcOp = cmpOp.getOperation()->getParentOfType<mlir::FuncOp>();
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
       return cmpOp.emitError() << "parent func op not found in cache.";
@@ -519,7 +834,7 @@
     auto loc = constRefZeroOp.getLoc();
 
     auto funcOp =
-        constRefZeroOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
+        constRefZeroOp.getOperation()->getParentOfType<mlir::FuncOp>();
 
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
@@ -530,15 +845,28 @@
     int32_t ordinal =
         registerAllocation.mapToRegister(constRefZeroOp.getResult()).ordinal();
 
-    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::CallOp>(
+    emitc::ConstantOp refOp = nullptr;
+    for (auto constantOp : funcOp.getOps<emitc::ConstantOp>()) {
+      Operation *op = constantOp.getOperation();
+      if (!op->hasAttr("ref_ordinal")) continue;
+      if (op->getAttr("ref_ordinal")
+              .cast<IntegerAttr>()
+              .getValue()
+              .getZExtValue() == ordinal) {
+        refOp = constantOp;
+        break;
+      }
+    }
+
+    if (!refOp)
+      return constRefZeroOp.emitError()
+             << "Corresponding ref for ordinal '" << ordinal << "' not found";
+
+    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::ApplyOp>(
         /*op=*/constRefZeroOp,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
-        /*callee=*/StringAttr::get(ctx, "VM_ARRAY_ELEMENT_ADDRESS"),
-        /*args=*/
-        ArrayAttr::get(ctx, {emitc::OpaqueAttr::get(ctx, "local_refs"),
-                             rewriter.getI32IntegerAttr(ordinal)}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{});
+        /*applicableOperator=*/StringAttr::get(ctx, "&"),
+        /*operand=*/refOp);
 
     rewriter.create<emitc::CallOp>(
         /*location=*/loc,
@@ -576,20 +904,31 @@
       return constRefRodataOp.emitError() << "Unable to find RodataOp";
     }
 
-    // TODO(simon-camp): We can't represent structs in emitc (yet maybe), so
-    // the buffer where rodatas live after code generation as well as the
-    // state struct argument name are hardcoded here.
+    auto funcOp = constRefRodataOp.getOperation()
+                      ->template getParentOfType<mlir::FuncOp>();
+
+    BlockArgument stateArg = funcOp.getArgument(2);
+    auto rodataBuffersPtr = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_buffer_t*"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             emitc::OpaqueAttr::get(ctx, "rodata_buffers")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{stateArg});
+
     auto byteBufferPtrOp = rewriter.create<emitc::CallOp>(
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_buffer_t*"),
-        /*callee=*/StringAttr::get(ctx, "VM_ARRAY_ELEMENT_ADDRESS"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_ARRAY_ELEMENT_ADDRESS"),
         /*args=*/
         ArrayAttr::get(ctx,
-                       {emitc::OpaqueAttr::get(ctx, "state->rodata_buffers"),
+                       {rewriter.getIndexAttr(0),
                         rewriter.getUI32IntegerAttr(static_cast<uint32_t>(
                             rodataOp.ordinal().getValue().getZExtValue()))}),
         /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{});
+        /*operands=*/ArrayRef<Value>{rodataBuffersPtr.getResult(0)});
 
     auto typeIdOp = rewriter.create<emitc::CallOp>(
         /*location=*/loc,
@@ -599,9 +938,6 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{});
 
-    auto funcOp =
-        constRefRodataOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
-
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
       return constRefRodataOp.emitError()
@@ -613,19 +949,30 @@
         registerAllocation.mapToRegister(constRefRodataOp.getResult())
             .ordinal();
 
-    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::CallOp>(
-        /*op=*/constRefRodataOp,
-        /*type=*/
-        emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
-        // /*type=*/typeConverter->convertType(constRefRodataOp.getResult().getType()),
-        /*callee=*/StringAttr::get(ctx, "VM_ARRAY_ELEMENT_ADDRESS"),
-        /*args=*/
-        ArrayAttr::get(ctx, {emitc::OpaqueAttr::get(ctx, "local_refs"),
-                             rewriter.getI32IntegerAttr(ordinal)}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{});
+    emitc::ConstantOp refOp = nullptr;
+    for (auto constantOp : funcOp.getOps<emitc::ConstantOp>()) {
+      Operation *op = constantOp.getOperation();
+      if (!op->hasAttr("ref_ordinal")) continue;
+      if (op->getAttr("ref_ordinal")
+              .cast<IntegerAttr>()
+              .getValue()
+              .getZExtValue() == ordinal) {
+        refOp = constantOp;
+        break;
+      }
+    }
 
-    failableCall(
+    if (!refOp)
+      return constRefRodataOp.emitError()
+             << "Corresponding ref for ordinal '" << ordinal << "' not found";
+
+    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::ApplyOp>(
+        /*op=*/constRefRodataOp,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
+        /*applicableOperator=*/StringAttr::get(ctx, "&"),
+        /*operand=*/refOp);
+
+    returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_ref_wrap_retain"),
@@ -633,7 +980,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/
         ArrayRef<Value>{byteBufferPtrOp.getResult(0), typeIdOp.getResult(0),
-                        refPtrOp.getResult(0)});
+                        refPtrOp.getResult()});
 
     return success();
   }
@@ -641,6 +988,218 @@
   VMAnalysisCache &vmAnalysisCache;
 };
 
+class BranchOpConversion : public OpConversionPattern<IREE::VM::BranchOp> {
+  using OpConversionPattern<IREE::VM::BranchOp>::OpConversionPattern;
+
+ private:
+  LogicalResult matchAndRewrite(
+      IREE::VM::BranchOp op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const override {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
+
+    Type boolType = rewriter.getIntegerType(1);
+
+    rewriter.replaceOpWithNewOp<mlir::BranchOp>(op, op.dest(),
+                                                op.destOperands());
+
+    return success();
+  }
+};
+
+class CondBranchOpConversion
+    : public OpConversionPattern<IREE::VM::CondBranchOp> {
+  using OpConversionPattern<IREE::VM::CondBranchOp>::OpConversionPattern;
+
+ private:
+  LogicalResult matchAndRewrite(
+      IREE::VM::CondBranchOp op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const override {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
+
+    Type boolType = rewriter.getIntegerType(1);
+
+    auto condition = rewriter.create<IREE::VM::CmpNZI32Op>(
+        loc, rewriter.getI32Type(), op.condition());
+    auto conditionI1 = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/boolType,
+        /*callee=*/StringAttr::get(ctx, "EMITC_CAST"),
+        /*args=*/
+        ArrayAttr::get(ctx,
+                       {rewriter.getIndexAttr(0), TypeAttr::get(boolType)}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{condition.getResult()});
+
+    rewriter.replaceOpWithNewOp<mlir::CondBranchOp>(
+        op, conditionI1.getResult(0), op.trueDest(), op.trueDestOperands(),
+        op.falseDest(), op.falseDestOperands());
+
+    return success();
+  }
+};
+
+class ReturnOpConversion : public OpConversionPattern<IREE::VM::ReturnOp> {
+  using OpConversionPattern<IREE::VM::ReturnOp>::OpConversionPattern;
+
+ private:
+  LogicalResult matchAndRewrite(
+      IREE::VM::ReturnOp op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const override {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
+
+    auto funcOp = op.getOperation()->getParentOfType<mlir::FuncOp>();
+
+    releaseLocalRefs(rewriter, loc, funcOp);
+
+    // The result variables are the last N arguments of the function.
+    unsigned int firstOutputArgumentIndex =
+        funcOp.getNumArguments() - operands.size();
+
+    for (auto &operand : llvm::enumerate(operands)) {
+      unsigned int argumentIndex = firstOutputArgumentIndex + operand.index();
+      BlockArgument resultArgument = funcOp.getArgument(argumentIndex);
+
+      if (operand.value().getType().isa<IREE::VM::RefType>()) {
+        return op.emitError("ref types are not supported as function results.");
+      }
+
+      rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/TypeRange{},
+          /*callee=*/StringAttr::get(ctx, "EMITC_DEREF_ASSIGN"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{resultArgument, operand.value()});
+    }
+
+    auto status = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_status_t"),
+        /*callee=*/StringAttr::get(ctx, "iree_ok_status"),
+        /*args=*/ArrayAttr{},
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{});
+
+    rewriter.replaceOpWithNewOp<mlir::ReturnOp>(op, status.getResult(0));
+
+    return success();
+  }
+};
+
+class FailOpConversion : public OpConversionPattern<IREE::VM::FailOp> {
+  using OpConversionPattern<IREE::VM::FailOp>::OpConversionPattern;
+
+ private:
+  LogicalResult matchAndRewrite(
+      IREE::VM::FailOp op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const override {
+    auto ctx = op.getContext();
+    auto loc = op.getLoc();
+
+    Block *block = rewriter.getInsertionBlock();
+    Region *parentRegion = block->getParent();
+    Block *passthroughBlock;
+    {
+      OpBuilder::InsertionGuard guard(rewriter);
+      passthroughBlock =
+          rewriter.createBlock(parentRegion, parentRegion->end());
+
+      auto funcOp = op.getOperation()->getParentOfType<mlir::FuncOp>();
+
+      releaseLocalRefs(rewriter, loc, funcOp);
+
+      auto status = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "iree_status_t"),
+          /*callee=*/StringAttr::get(ctx, "iree_ok_status"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{});
+
+      rewriter.create<mlir::ReturnOp>(loc, status.getResult(0));
+    }
+    Block *failureBlock;
+    {
+      OpBuilder::InsertionGuard guard(rewriter);
+      failureBlock = rewriter.createBlock(parentRegion, parentRegion->end());
+
+      auto funcOp = op.getOperation()->getParentOfType<mlir::FuncOp>();
+
+      releaseLocalRefs(rewriter, loc, funcOp);
+
+      std::string message = std::string("\"") +
+                            op.message().getValueOr("").str() +
+                            std::string("\"");
+
+      auto messageOp = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "iree_string_view_t"),
+          /*callee=*/StringAttr::get(ctx, "iree_make_cstring_view"),
+          /*args=*/
+          ArrayAttr::get(ctx, {mlir::emitc::OpaqueAttr::get(ctx, message)}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{});
+
+      auto messageSizeOp = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "iree_host_size_t"),
+          /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_MEMBER"),
+          /*args=*/
+          ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                               emitc::OpaqueAttr::get(ctx, "size")}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{messageOp.getResult(0)});
+
+      auto messageSizeIntOp = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "int"),
+          /*callee=*/StringAttr::get(ctx, "EMITC_CAST"),
+          /*args=*/
+          ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                               emitc::OpaqueAttr::get(ctx, "int")}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{messageSizeOp.getResult(0)});
+
+      auto messageDataOp = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "const char*"),
+          /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_MEMBER"),
+          /*args=*/
+          ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                               emitc::OpaqueAttr::get(ctx, "data")}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{messageOp.getResult(0)});
+
+      auto status = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/emitc::OpaqueType::get(ctx, "iree_status_t"),
+          /*callee=*/StringAttr::get(ctx, "iree_status_allocate_f"),
+          /*args=*/
+          ArrayAttr::get(ctx,
+                         {mlir::emitc::OpaqueAttr::get(
+                              ctx, "IREE_STATUS_FAILED_PRECONDITION"),
+                          mlir::emitc::OpaqueAttr::get(ctx, "\"<vm>\""),
+                          rewriter.getI32IntegerAttr(0),
+                          mlir::emitc::OpaqueAttr::get(ctx, "\"%.*s\""),
+                          rewriter.getIndexAttr(0), rewriter.getIndexAttr(1)}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{messageSizeIntOp.getResult(0),
+                          messageDataOp.getResult(0)});
+
+      rewriter.create<mlir::ReturnOp>(loc, status.getResult(0));
+    }
+
+    rewriter.replaceOpWithNewOp<IREE::VM::CondBranchOp>(
+        op, op.status(), failureBlock, passthroughBlock);
+
+    return success();
+  }
+};
+
 template <typename LoadOpTy, typename GlobalOpTy>
 class GlobalLoadOpConversion : public OpConversionPattern<LoadOpTy> {
   using OpConversionPattern<LoadOpTy>::OpConversionPattern;
@@ -654,6 +1213,7 @@
       LoadOpTy loadOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto ctx = loadOp.getContext();
+    auto loc = loadOp.getLoc();
 
     GlobalOpTy globalOp =
         lookupSymbolRef<LoadOpTy, GlobalOpTy>(loadOp, "global");
@@ -661,20 +1221,31 @@
       return loadOp.emitError() << "Unable to find GlobalOp";
     }
 
-    auto type = loadOp.getOperation()->getResultTypes();
-    StringAttr callee = StringAttr::get(ctx, funcName);
+    auto funcOp =
+        loadOp.getOperation()->template getParentOfType<mlir::FuncOp>();
 
-    // TODO(simon-camp): We can't represent structs in emitc (yet maybe), so
-    // the buffer where globals live after code generation as well as the
-    // state struct argument name are hardcoded here.
-    ArrayAttr args = rewriter.getArrayAttr(
-        {emitc::OpaqueAttr::get(ctx, "state->rwdata"),
-         rewriter.getUI32IntegerAttr(static_cast<uint32_t>(
-             globalOp.ordinal().getValue().getZExtValue()))});
-    ArrayAttr templateArgs;
+    BlockArgument stateArg = funcOp.getArgument(2);
+    auto rwDataPtr = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "uint8_t*"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             emitc::OpaqueAttr::get(ctx, "rwdata")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{stateArg});
 
-    rewriter.replaceOpWithNewOp<emitc::CallOp>(loadOp, type, callee, args,
-                                               templateArgs, operands);
+    rewriter.replaceOpWithNewOp<emitc::CallOp>(
+        /*op=*/loadOp,
+        /*type=*/loadOp.getOperation()->getResultTypes(),
+        /*callee=*/StringAttr::get(ctx, funcName),
+        /*args=*/
+        rewriter.getArrayAttr(
+            {rewriter.getIndexAttr(0),
+             rewriter.getUI32IntegerAttr(static_cast<uint32_t>(
+                 globalOp.ordinal().getValue().getZExtValue()))}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{rwDataPtr.getResult(0)});
 
     return success();
   }
@@ -695,6 +1266,7 @@
       StoreOpTy storeOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
     auto ctx = storeOp.getContext();
+    auto loc = storeOp.getLoc();
 
     GlobalOpTy globalOp =
         lookupSymbolRef<StoreOpTy, GlobalOpTy>(storeOp, "global");
@@ -702,21 +1274,32 @@
       return storeOp.emitError() << "Unable to find GlobalOp";
     }
 
-    auto type = storeOp.getOperation()->getResultTypes();
-    StringAttr callee = StringAttr::get(ctx, funcName);
+    auto funcOp =
+        storeOp.getOperation()->template getParentOfType<mlir::FuncOp>();
 
-    // TODO(simon-camp): We can't represent structs in emitc (yet maybe), so
-    // the buffer where globals live after code generation as well as the
-    // state struct argument name are hardcoded here.
-    ArrayAttr args = rewriter.getArrayAttr(
-        {emitc::OpaqueAttr::get(ctx, "state->rwdata"),
-         rewriter.getUI32IntegerAttr(static_cast<uint32_t>(
-             globalOp.ordinal().getValue().getZExtValue())),
-         rewriter.getIndexAttr(0)});
-    ArrayAttr templateArgs;
+    BlockArgument stateArg = funcOp.getArgument(2);
+    auto rwDataPtr = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "uint8_t*"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             emitc::OpaqueAttr::get(ctx, "rwdata")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{stateArg});
 
-    rewriter.replaceOpWithNewOp<emitc::CallOp>(storeOp, type, callee, args,
-                                               templateArgs, operands);
+    rewriter.replaceOpWithNewOp<emitc::CallOp>(
+        /*op=*/storeOp,
+        /*type=*/storeOp.getOperation()->getResultTypes(),
+        /*callee=*/StringAttr::get(ctx, funcName),
+        /*args=*/
+        rewriter.getArrayAttr(
+            {rewriter.getIndexAttr(0),
+             rewriter.getUI32IntegerAttr(static_cast<uint32_t>(
+                 globalOp.ordinal().getValue().getZExtValue())),
+             rewriter.getIndexAttr(1)}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{rwDataPtr.getResult(0), operands[0]});
 
     return success();
   }
@@ -759,7 +1342,8 @@
         /*applicableOperator=*/StringAttr::get(ctx, "*"),
         /*operand=*/listOperand);
 
-    auto listDerefOp = rewriter.create<emitc::CallOp>(
+    auto listDerefOp = failListNull(
+        /*rewriter=*/rewriter,
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"),
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_deref"),
@@ -767,16 +1351,6 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{refOp.getResult()});
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_LIST_NULL"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                             emitc::OpaqueAttr::get(ctx, "local_refs")}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{listDerefOp.getResult(0)});
-
     // Replace the one list argument (which is wrapped in a ref) with the
     // unwrapped list.
     SmallVector<Value, 4> updatedOperands;
@@ -789,7 +1363,7 @@
     }
 
     if (failable) {
-      auto callOp = failableCall(
+      auto callOp = returnIfError(
           /*rewriter=*/rewriter,
           /*location=*/loc,
           /*callee=*/StringAttr::get(ctx, funcName),
@@ -834,20 +1408,6 @@
   LogicalResult matchAndRewrite(
       IREE::VM::ListAllocOp allocOp, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const override {
-    // clang-format off
-    // The generated c code looks roughly like this.
-    // iree_vm_type_def_t element_type = iree_vm_type_def_make_value_type(IREE_VM_VALUE_TYPE_I32);
-    // iree_vm_type_def_t* element_type_ptr = &element_type;
-    // iree_vm_list_t* list = NULL;
-    // iree_vm_list_t** list_ptr = &list;
-    // iree_status_t status = iree_vm_list_create(element_type_ptr, {initial_capacity}, state->allocator, list_ptr);
-    // VM_RETURN_IF_ERROR(status);
-    // iree_vm_ref_t* ref_ptr = &local_refs[{ordinal}];
-    // iree_vm_ref_type_t ref_type = iree_vm_list_type_id();
-    // iree_status_t status2 = iree_vm_ref_wrap_assign(list, ref_type, ref_ptr));
-    // VM_RETURN_IF_ERROR(status2);
-    // clang-format on
-
     auto ctx = allocOp.getContext();
     auto loc = allocOp.getLoc();
 
@@ -881,20 +1441,29 @@
         /*applicableOperator=*/StringAttr::get(ctx, "&"),
         /*operand=*/listOp.getResult());
 
-    failableCall(
+    auto funcOp =
+        allocOp.getOperation()->template getParentOfType<mlir::FuncOp>();
+
+    BlockArgument stateArg = funcOp.getArgument(2);
+    auto allocatorOp = rewriter.create<emitc::CallOp>(
+        /*location=*/loc,
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_allocator_t"),
+        /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+        /*args=*/
+        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                             emitc::OpaqueAttr::get(ctx, "allocator")}),
+        /*templateArgs=*/ArrayAttr{},
+        /*operands=*/ArrayRef<Value>{stateArg});
+
+    returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_create"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0), rewriter.getIndexAttr(1),
-                             emitc::OpaqueAttr::get(ctx, "state->allocator"),
-                             rewriter.getIndexAttr(2)}),
+        /*args=*/ArrayAttr{},
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/
         ArrayRef<Value>{elementTypePtrOp.getValue().getResult(), operands[0],
-                        listPtrOp.getResult()});
-
-    auto funcOp = allocOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
+                        allocatorOp.getResult(0), listPtrOp.getResult()});
 
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
@@ -905,16 +1474,28 @@
     int32_t ordinal =
         registerAllocation.mapToRegister(allocOp.getResult()).ordinal();
 
-    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::CallOp>(
+    emitc::ConstantOp refOp = nullptr;
+    for (auto constantOp : funcOp.getOps<emitc::ConstantOp>()) {
+      Operation *op = constantOp.getOperation();
+      if (!op->hasAttr("ref_ordinal")) continue;
+      if (op->getAttr("ref_ordinal")
+              .cast<IntegerAttr>()
+              .getValue()
+              .getZExtValue() == ordinal) {
+        refOp = constantOp;
+        break;
+      }
+    }
+
+    if (!refOp)
+      return allocOp.emitError()
+             << "Corresponding ref for ordinal '" << ordinal << "' not found";
+
+    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::ApplyOp>(
         /*op=*/allocOp,
-        // /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
-        /*type=*/convertedType,
-        /*callee=*/StringAttr::get(ctx, "VM_ARRAY_ELEMENT_ADDRESS"),
-        /*args=*/
-        ArrayAttr::get(ctx, {emitc::OpaqueAttr::get(ctx, "local_refs"),
-                             rewriter.getI32IntegerAttr(ordinal)}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{});
+        /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
+        /*applicableOperator=*/StringAttr::get(ctx, "&"),
+        /*operand=*/refOp);
 
     auto refTypeOp = rewriter.create<emitc::CallOp>(
         /*location=*/loc,
@@ -924,7 +1505,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{});
 
-    failableCall(
+    returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_ref_wrap_assign"),
@@ -932,7 +1513,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/
         ArrayRef<Value>{listOp.getResult(), refTypeOp.getResult(0),
-                        refPtrOp.getResult(0)});
+                        refPtrOp.getResult()});
 
     return success();
   }
@@ -989,7 +1570,8 @@
         /*applicableOperator=*/StringAttr::get(ctx, "*"),
         /*operand=*/operands[0]);
 
-    auto listDerefOp = rewriter.create<emitc::CallOp>(
+    auto listDerefOp = failListNull(
+        /*rewriter=*/rewriter,
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"),
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_deref"),
@@ -997,17 +1579,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{refOp.getResult()});
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_LIST_NULL"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                             emitc::OpaqueAttr::get(ctx, "local_refs")}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{listDerefOp.getResult(0)});
-
-    auto getValueOp = failableCall(
+    auto getValueOp = returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_get_value_as"),
@@ -1049,31 +1621,22 @@
     auto ctx = getOp.getContext();
     auto loc = getOp.getLoc();
 
-    auto refOp = rewriter.create<emitc::ApplyOp>(
+    auto listRefOp = rewriter.create<emitc::ApplyOp>(
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t"),
         /*applicableOperator=*/StringAttr::get(ctx, "*"),
         /*operand=*/operands[0]);
 
-    auto listDerefOp = rewriter.create<emitc::CallOp>(
+    auto listDerefOp = failListNull(
+        /*rewriter=*/rewriter,
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"),
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_deref"),
         /*args=*/ArrayAttr{},
         /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{refOp.getResult()});
+        /*operands=*/ArrayRef<Value>{listRefOp.getResult()});
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_LIST_NULL"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                             emitc::OpaqueAttr::get(ctx, "local_refs")}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{listDerefOp.getResult(0)});
-
-    auto funcOp = getOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
+    auto funcOp = getOp.getOperation()->getParentOfType<mlir::FuncOp>();
 
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
@@ -1084,17 +1647,30 @@
     int32_t ordinal =
         registerAllocation.mapToRegister(getOp.getResult()).ordinal();
 
-    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::CallOp>(
+    emitc::ConstantOp refOp = nullptr;
+    for (auto constantOp : funcOp.getOps<emitc::ConstantOp>()) {
+      Operation *op = constantOp.getOperation();
+      if (!op->hasAttr("ref_ordinal")) continue;
+      if (op->getAttr("ref_ordinal")
+              .cast<IntegerAttr>()
+              .getValue()
+              .getZExtValue() == ordinal) {
+        refOp = constantOp;
+        break;
+      }
+    }
+
+    if (!refOp)
+      return getOp.emitError()
+             << "Corresponding ref for ordinal '" << ordinal << "' not found";
+
+    auto refPtrOp = rewriter.replaceOpWithNewOp<emitc::ApplyOp>(
         /*op=*/getOp,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_t*"),
-        /*callee=*/StringAttr::get(ctx, "VM_ARRAY_ELEMENT_ADDRESS"),
-        /*args=*/
-        ArrayAttr::get(ctx, {emitc::OpaqueAttr::get(ctx, "local_refs"),
-                             rewriter.getI32IntegerAttr(ordinal)}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{});
+        /*applicableOperator=*/StringAttr::get(ctx, "&"),
+        /*operand=*/refOp);
 
-    failableCall(
+    returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_get_ref_retain"),
@@ -1102,7 +1678,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/
         ArrayRef<Value>{listDerefOp.getResult(0), getOp.index(),
-                        refPtrOp.getResult(0)});
+                        refPtrOp.getResult()});
 
     Type elementType = getOp.getResult().getType();
 
@@ -1112,16 +1688,115 @@
       return failure();
     }
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_REF_RELEASE_IF_TYPE_MISMATCH"),
-        /*args=*/
-        ArrayAttr{},
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/
-        ArrayRef<Value>{refPtrOp.getResult(0),
-                        elementTypePtrOp.getValue().getResult()});
+    // Build the following expression:
+    // (ref->type != IREE_VM_REF_TYPE_NULL &&
+    // (iree_vm_type_def_is_value(type_def) || ref->type != type_def->ref_type))
+    emitc::CallOp invalidType;
+    {
+      auto refType = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/
+          emitc::OpaqueType::get(ctx, "iree_vm_ref_type_t"),
+          /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+          /*args=*/
+          ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                               emitc::OpaqueAttr::get(ctx, "type")}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{refPtrOp.getResult()});
+
+      auto refTypeNull = rewriter.create<emitc::ConstantOp>(
+          /*location=*/loc,
+          /*resultType=*/emitc::OpaqueType::get(ctx, "iree_vm_ref_type_t"),
+          /*value=*/emitc::OpaqueAttr::get(ctx, "IREE_VM_REF_TYPE_NULL"));
+
+      auto typedefIsValue = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/rewriter.getIntegerType(1),
+          /*callee=*/StringAttr::get(ctx, "iree_vm_type_def_is_value"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{elementTypePtrOp.getValue().getResult()});
+
+      auto typedefRefType = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/
+          emitc::OpaqueType::get(ctx, "iree_vm_ref_type_t"),
+          /*callee=*/StringAttr::get(ctx, "EMITC_STRUCT_PTR_MEMBER"),
+          /*args=*/
+          ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
+                               emitc::OpaqueAttr::get(ctx, "ref_type")}),
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{elementTypePtrOp.getValue().getResult()});
+
+      auto refTypeIsNotNull = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/rewriter.getIntegerType(1),
+          /*callee=*/StringAttr::get(ctx, "EMITC_NE"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{refType.getResult(0), refTypeNull.getResult()});
+
+      auto refTypesDontMatch = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/rewriter.getIntegerType(1),
+          /*callee=*/StringAttr::get(ctx, "EMITC_NE"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{refType.getResult(0), typedefRefType.getResult(0)});
+
+      auto invalidRefType = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/rewriter.getIntegerType(1),
+          /*callee=*/StringAttr::get(ctx, "EMITC_OR"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{typedefIsValue.getResult(0),
+                          refTypesDontMatch.getResult(0)});
+
+      invalidType = rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/rewriter.getIntegerType(1),
+          /*callee=*/StringAttr::get(ctx, "EMITC_AND"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/
+          ArrayRef<Value>{refTypeIsNotNull.getResult(0),
+                          invalidRefType.getResult(0)});
+    }
+
+    // Start by splitting the block into two. The part before will contain the
+    // condition, and the part after will contain the continuation point.
+    Block *condBlock = rewriter.getInsertionBlock();
+    Block::iterator opPosition = rewriter.getInsertionPoint();
+    Block *continuationBlock = rewriter.splitBlock(condBlock, opPosition);
+
+    // Create a new block for the target of the failure.
+    Block *failureBlock;
+    {
+      OpBuilder::InsertionGuard guard(rewriter);
+      Region *parentRegion = condBlock->getParent();
+      failureBlock = rewriter.createBlock(parentRegion, parentRegion->end());
+
+      rewriter.create<emitc::CallOp>(
+          /*location=*/loc,
+          /*type=*/TypeRange{},
+          /*callee=*/StringAttr::get(ctx, "iree_vm_ref_release"),
+          /*args=*/ArrayAttr{},
+          /*templateArgs=*/ArrayAttr{},
+          /*operands=*/ArrayRef<Value>{refPtrOp.getResult()});
+
+      rewriter.create<mlir::BranchOp>(loc, continuationBlock);
+    }
+
+    rewriter.setInsertionPointToEnd(condBlock);
+    auto branchOp = rewriter.create<CondBranchOp>(
+        loc, invalidType.getResult(0), failureBlock, continuationBlock);
 
     return success();
   }
@@ -1172,7 +1847,8 @@
         /*applicableOperator=*/StringAttr::get(ctx, "*"),
         /*operand=*/operands[0]);
 
-    auto listDerefOp = rewriter.create<emitc::CallOp>(
+    auto listDerefOp = failListNull(
+        /*rewriter=*/rewriter,
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"),
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_deref"),
@@ -1180,17 +1856,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{refOp.getResult()});
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_LIST_NULL"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                             emitc::OpaqueAttr::get(ctx, "local_refs")}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{listDerefOp.getResult(0)});
-
-    auto callOp = failableCall(
+    auto callOp = returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_set_value"),
@@ -1228,7 +1894,8 @@
         /*applicableOperator=*/StringAttr::get(ctx, "*"),
         /*operand=*/operands[0]);
 
-    auto listDerefOp = rewriter.create<emitc::CallOp>(
+    auto listDerefOp = failListNull(
+        /*rewriter=*/rewriter,
         /*location=*/loc,
         /*type=*/emitc::OpaqueType::get(ctx, "iree_vm_list_t*"),
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_deref"),
@@ -1236,17 +1903,7 @@
         /*templateArgs=*/ArrayAttr{},
         /*operands=*/ArrayRef<Value>{refOp.getResult()});
 
-    rewriter.create<emitc::CallOp>(
-        /*location=*/loc,
-        /*type=*/TypeRange{},
-        /*callee=*/StringAttr::get(ctx, "VM_RETURN_IF_LIST_NULL"),
-        /*args=*/
-        ArrayAttr::get(ctx, {rewriter.getIndexAttr(0),
-                             emitc::OpaqueAttr::get(ctx, "local_refs")}),
-        /*templateArgs=*/ArrayAttr{},
-        /*operands=*/ArrayRef<Value>{listDerefOp.getResult(0)});
-
-    auto funcOp = setOp.getOperation()->getParentOfType<IREE::VM::FuncOp>();
+    auto funcOp = setOp.getOperation()->getParentOfType<mlir::FuncOp>();
     auto ptr = vmAnalysisCache.find(funcOp.getOperation());
     if (ptr == vmAnalysisCache.end()) {
       return setOp.emitError() << "parent func op not found in cache.";
@@ -1256,7 +1913,7 @@
     bool move =
         valueLiveness.isLastValueUse(setOp.value(), setOp.getOperation());
 
-    auto callOp = failableCall(
+    auto callOp = returnIfError(
         /*rewriter=*/rewriter,
         /*location=*/loc,
         /*callee=*/StringAttr::get(ctx, "iree_vm_list_set_ref_retain"),
@@ -1279,8 +1936,13 @@
                                OwningRewritePatternList &patterns,
                                VMAnalysisCache &vmAnalysisCache) {
   populatePreserveCompilerHintsPatterns(context, patterns);
-  // Calls
-  patterns.insert<VMCallOpConversion>(context);
+
+  // CFG
+  patterns.insert<BranchOpConversion>(context);
+  patterns.insert<CallOpConversion>(context);
+  patterns.insert<CondBranchOpConversion>(context);
+  patterns.insert<FailOpConversion>(context);
+  patterns.insert<ReturnOpConversion>(context);
 
   // Globals
   patterns.insert<
@@ -1311,59 +1973,67 @@
   patterns.insert<ListSetRefOpConversion>(context, vmAnalysisCache);
 
   // Conditional assignment ops
-  patterns.insert<CallOpConversion<IREE::VM::SelectI32Op>>(context,
-                                                           "vm_select_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::SelectI32Op>>(context,
+                                                              "vm_select_i32");
 
   // Native integer arithmetic ops
-  patterns.insert<CallOpConversion<IREE::VM::AddI32Op>>(context, "vm_add_i32");
-  patterns.insert<CallOpConversion<IREE::VM::SubI32Op>>(context, "vm_sub_i32");
-  patterns.insert<CallOpConversion<IREE::VM::MulI32Op>>(context, "vm_mul_i32");
-  patterns.insert<CallOpConversion<IREE::VM::DivI32SOp>>(context,
-                                                         "vm_div_i32s");
-  patterns.insert<CallOpConversion<IREE::VM::DivI32UOp>>(context,
-                                                         "vm_div_i32u");
-  patterns.insert<CallOpConversion<IREE::VM::RemI32SOp>>(context,
-                                                         "vm_rem_i32s");
-  patterns.insert<CallOpConversion<IREE::VM::RemI32UOp>>(context,
-                                                         "vm_rem_i32u");
-  patterns.insert<CallOpConversion<IREE::VM::FMAI32Op>>(context, "vm_fma_i32");
-  patterns.insert<CallOpConversion<IREE::VM::NotI32Op>>(context, "vm_not_i32");
-  patterns.insert<CallOpConversion<IREE::VM::AndI32Op>>(context, "vm_and_i32");
-  patterns.insert<CallOpConversion<IREE::VM::OrI32Op>>(context, "vm_or_i32");
-  patterns.insert<CallOpConversion<IREE::VM::XorI32Op>>(context, "vm_xor_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::AddI32Op>>(context,
+                                                           "vm_add_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::SubI32Op>>(context,
+                                                           "vm_sub_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::MulI32Op>>(context,
+                                                           "vm_mul_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::DivI32SOp>>(context,
+                                                            "vm_div_i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::DivI32UOp>>(context,
+                                                            "vm_div_i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::RemI32SOp>>(context,
+                                                            "vm_rem_i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::RemI32UOp>>(context,
+                                                            "vm_rem_i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::FMAI32Op>>(context,
+                                                           "vm_fma_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::NotI32Op>>(context,
+                                                           "vm_not_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::AndI32Op>>(context,
+                                                           "vm_and_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::OrI32Op>>(context, "vm_or_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::XorI32Op>>(context,
+                                                           "vm_xor_i32");
 
   // Casting and type conversion/emulation ops
-  patterns.insert<CallOpConversion<IREE::VM::TruncI32I8Op>>(context,
-                                                            "vm_trunc_i32i8");
-  patterns.insert<CallOpConversion<IREE::VM::TruncI32I16Op>>(context,
-                                                             "vm_trunc_i32i16");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI8I32SOp>>(context,
-                                                           "vm_ext_i8i32s");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI8I32UOp>>(context,
-                                                           "vm_ext_i8i32u");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI16I32SOp>>(context,
-                                                            "vm_ext_i16i32s");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI16I32UOp>>(context,
-                                                            "vm_ext_i16i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::TruncI32I8Op>>(
+      context, "vm_trunc_i32i8");
+  patterns.insert<GenericOpConversion<IREE::VM::TruncI32I16Op>>(
+      context, "vm_trunc_i32i16");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI8I32SOp>>(context,
+                                                              "vm_ext_i8i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI8I32UOp>>(context,
+                                                              "vm_ext_i8i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI16I32SOp>>(
+      context, "vm_ext_i16i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI16I32UOp>>(
+      context, "vm_ext_i16i32u");
 
   // Native bitwise shift and rotate ops
-  patterns.insert<CallOpConversion<IREE::VM::ShlI32Op>>(context, "vm_shl_i32");
-  patterns.insert<CallOpConversion<IREE::VM::ShrI32SOp>>(context,
-                                                         "vm_shr_i32s");
-  patterns.insert<CallOpConversion<IREE::VM::ShrI32UOp>>(context,
-                                                         "vm_shr_i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::ShlI32Op>>(context,
+                                                           "vm_shl_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::ShrI32SOp>>(context,
+                                                            "vm_shr_i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::ShrI32UOp>>(context,
+                                                            "vm_shr_i32u");
 
   // Comparison ops
-  patterns.insert<CallOpConversion<IREE::VM::CmpEQI32Op>>(context,
-                                                          "vm_cmp_eq_i32");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNEI32Op>>(context,
-                                                          "vm_cmp_ne_i32");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTI32SOp>>(context,
-                                                           "vm_cmp_lt_i32s");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTI32UOp>>(context,
-                                                           "vm_cmp_lt_i32u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNZI32Op>>(context,
-                                                          "vm_cmp_nz_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpEQI32Op>>(context,
+                                                             "vm_cmp_eq_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNEI32Op>>(context,
+                                                             "vm_cmp_ne_i32");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTI32SOp>>(context,
+                                                              "vm_cmp_lt_i32s");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTI32UOp>>(context,
+                                                              "vm_cmp_lt_i32u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNZI32Op>>(context,
+                                                             "vm_cmp_nz_i32");
   patterns.insert<CompareRefOpConversion<IREE::VM::CmpEQRefOp>>(
       context, "vm_cmp_eq_ref", vmAnalysisCache);
   patterns.insert<CompareRefOpConversion<IREE::VM::CmpNERefOp>>(
@@ -1383,78 +2053,91 @@
   patterns.insert<ConstZeroOpConversion<IREE::VM::ConstF32ZeroOp>>(context);
 
   // ExtF32: Conditional assignment
-  patterns.insert<CallOpConversion<IREE::VM::SelectF32Op>>(context,
-                                                           "vm_select_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::SelectF32Op>>(context,
+                                                              "vm_select_f32");
 
   // ExtF32: Native floating-point arithmetic
-  patterns.insert<CallOpConversion<IREE::VM::AddF32Op>>(context, "vm_add_f32");
-  patterns.insert<CallOpConversion<IREE::VM::SubF32Op>>(context, "vm_sub_f32");
-  patterns.insert<CallOpConversion<IREE::VM::MulF32Op>>(context, "vm_mul_f32");
-  patterns.insert<CallOpConversion<IREE::VM::DivF32Op>>(context, "vm_div_f32");
-  patterns.insert<CallOpConversion<IREE::VM::RemF32Op>>(context, "vm_rem_f32");
-  patterns.insert<CallOpConversion<IREE::VM::FMAF32Op>>(context, "vm_fma_f32");
-  patterns.insert<CallOpConversion<IREE::VM::AbsF32Op>>(context, "vm_abs_f32");
-  patterns.insert<CallOpConversion<IREE::VM::NegF32Op>>(context, "vm_neg_f32");
-  patterns.insert<CallOpConversion<IREE::VM::CeilF32Op>>(context,
-                                                         "vm_ceil_f32");
-  patterns.insert<CallOpConversion<IREE::VM::FloorF32Op>>(context,
-                                                          "vm_floor_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::AddF32Op>>(context,
+                                                           "vm_add_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::SubF32Op>>(context,
+                                                           "vm_sub_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::MulF32Op>>(context,
+                                                           "vm_mul_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::DivF32Op>>(context,
+                                                           "vm_div_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::RemF32Op>>(context,
+                                                           "vm_rem_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::FMAF32Op>>(context,
+                                                           "vm_fma_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::AbsF32Op>>(context,
+                                                           "vm_abs_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::NegF32Op>>(context,
+                                                           "vm_neg_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::CeilF32Op>>(context,
+                                                            "vm_ceil_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::FloorF32Op>>(context,
+                                                             "vm_floor_f32");
 
-  patterns.insert<CallOpConversion<IREE::VM::AtanF32Op>>(context,
-                                                         "vm_atan_f32");
-  patterns.insert<CallOpConversion<IREE::VM::Atan2F32Op>>(context,
-                                                          "vm_atan2_f32");
-  patterns.insert<CallOpConversion<IREE::VM::CosF32Op>>(context, "vm_cos_f32");
-  patterns.insert<CallOpConversion<IREE::VM::SinF32Op>>(context, "vm_sin_f32");
-  patterns.insert<CallOpConversion<IREE::VM::ExpF32Op>>(context, "vm_exp_f32");
-  patterns.insert<CallOpConversion<IREE::VM::Exp2F32Op>>(context,
-                                                         "vm_exp2_f32");
-  patterns.insert<CallOpConversion<IREE::VM::ExpM1F32Op>>(context,
-                                                          "vm_expm1_f32");
-  patterns.insert<CallOpConversion<IREE::VM::LogF32Op>>(context, "vm_log_f32");
-  patterns.insert<CallOpConversion<IREE::VM::Log10F32Op>>(context,
-                                                          "vm_log10_f32");
-  patterns.insert<CallOpConversion<IREE::VM::Log1pF32Op>>(context,
-                                                          "vm_log1p_f32");
-  patterns.insert<CallOpConversion<IREE::VM::Log2F32Op>>(context,
-                                                         "vm_log2_f32");
-  patterns.insert<CallOpConversion<IREE::VM::PowF32Op>>(context, "vm_pow_f32");
-  patterns.insert<CallOpConversion<IREE::VM::RsqrtF32Op>>(context,
-                                                          "vm_rsqrt_f32");
-  patterns.insert<CallOpConversion<IREE::VM::SqrtF32Op>>(context,
-                                                         "vm_sqrt_f32");
-  patterns.insert<CallOpConversion<IREE::VM::TanhF32Op>>(context,
-                                                         "vm_tanh_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::AtanF32Op>>(context,
+                                                            "vm_atan_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::Atan2F32Op>>(context,
+                                                             "vm_atan2_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::CosF32Op>>(context,
+                                                           "vm_cos_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::SinF32Op>>(context,
+                                                           "vm_sin_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::ExpF32Op>>(context,
+                                                           "vm_exp_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::Exp2F32Op>>(context,
+                                                            "vm_exp2_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::ExpM1F32Op>>(context,
+                                                             "vm_expm1_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::LogF32Op>>(context,
+                                                           "vm_log_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::Log10F32Op>>(context,
+                                                             "vm_log10_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::Log1pF32Op>>(context,
+                                                             "vm_log1p_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::Log2F32Op>>(context,
+                                                            "vm_log2_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::PowF32Op>>(context,
+                                                           "vm_pow_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::RsqrtF32Op>>(context,
+                                                             "vm_rsqrt_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::SqrtF32Op>>(context,
+                                                            "vm_sqrt_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::TanhF32Op>>(context,
+                                                            "vm_tanh_f32");
 
   // ExtF32: Casting and type conversion/emulation
-  patterns.insert<CallOpConversion<IREE::VM::CastSI32F32Op>>(context,
-                                                             "vm_cast_si32f32");
-  patterns.insert<CallOpConversion<IREE::VM::CastUI32F32Op>>(context,
-                                                             "vm_cast_ui32f32");
-  patterns.insert<CallOpConversion<IREE::VM::CastF32SI32Op>>(context,
-                                                             "vm_cast_f32si32");
-  patterns.insert<CallOpConversion<IREE::VM::CastF32UI32Op>>(context,
-                                                             "vm_cast_f32ui32");
+  patterns.insert<GenericOpConversion<IREE::VM::CastSI32F32Op>>(
+      context, "vm_cast_si32f32");
+  patterns.insert<GenericOpConversion<IREE::VM::CastUI32F32Op>>(
+      context, "vm_cast_ui32f32");
+  patterns.insert<GenericOpConversion<IREE::VM::CastF32SI32Op>>(
+      context, "vm_cast_f32si32");
+  patterns.insert<GenericOpConversion<IREE::VM::CastF32UI32Op>>(
+      context, "vm_cast_f32ui32");
 
   // ExtF32: Comparison ops
-  patterns.insert<CallOpConversion<IREE::VM::CmpEQF32OOp>>(context,
-                                                           "vm_cmp_eq_f32o");
-  patterns.insert<CallOpConversion<IREE::VM::CmpEQF32UOp>>(context,
-                                                           "vm_cmp_eq_f32u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNEF32OOp>>(context,
-                                                           "vm_cmp_ne_f32o");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNEF32UOp>>(context,
-                                                           "vm_cmp_ne_f32u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTF32OOp>>(context,
-                                                           "vm_cmp_lt_f32o");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTF32UOp>>(context,
-                                                           "vm_cmp_lt_f32u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTEF32OOp>>(context,
-                                                            "vm_cmp_lte_f32o");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTEF32UOp>>(context,
-                                                            "vm_cmp_lte_f32u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNaNF32Op>>(context,
-                                                           "vm_cmp_nan_f32");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpEQF32OOp>>(context,
+                                                              "vm_cmp_eq_f32o");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpEQF32UOp>>(context,
+                                                              "vm_cmp_eq_f32u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNEF32OOp>>(context,
+                                                              "vm_cmp_ne_f32o");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNEF32UOp>>(context,
+                                                              "vm_cmp_ne_f32u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTF32OOp>>(context,
+                                                              "vm_cmp_lt_f32o");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTF32UOp>>(context,
+                                                              "vm_cmp_lt_f32u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTEF32OOp>>(
+      context, "vm_cmp_lte_f32o");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTEF32UOp>>(
+      context, "vm_cmp_lte_f32u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNaNF32Op>>(context,
+                                                              "vm_cmp_nan_f32");
 
   // ExtI64: Globals
   patterns.insert<
@@ -1473,52 +2156,60 @@
   patterns.insert<ListSetOpConversion<IREE::VM::ListSetI64Op>>(context);
 
   // ExtI64: Conditional assignment ops
-  patterns.insert<CallOpConversion<IREE::VM::SelectI64Op>>(context,
-                                                           "vm_select_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::SelectI64Op>>(context,
+                                                              "vm_select_i64");
   // ExtI64: Native integer arithmetic ops
-  patterns.insert<CallOpConversion<IREE::VM::AddI64Op>>(context, "vm_add_i64");
-  patterns.insert<CallOpConversion<IREE::VM::SubI64Op>>(context, "vm_sub_i64");
-  patterns.insert<CallOpConversion<IREE::VM::MulI64Op>>(context, "vm_mul_i64");
-  patterns.insert<CallOpConversion<IREE::VM::DivI64SOp>>(context,
-                                                         "vm_div_i64s");
-  patterns.insert<CallOpConversion<IREE::VM::DivI64UOp>>(context,
-                                                         "vm_div_i64u");
-  patterns.insert<CallOpConversion<IREE::VM::RemI64SOp>>(context,
-                                                         "vm_rem_i64s");
-  patterns.insert<CallOpConversion<IREE::VM::RemI64UOp>>(context,
-                                                         "vm_rem_i64u");
-  patterns.insert<CallOpConversion<IREE::VM::FMAI64Op>>(context, "vm_fma_i64");
-  patterns.insert<CallOpConversion<IREE::VM::NotI64Op>>(context, "vm_not_i64");
-  patterns.insert<CallOpConversion<IREE::VM::AndI64Op>>(context, "vm_and_i64");
-  patterns.insert<CallOpConversion<IREE::VM::OrI64Op>>(context, "vm_or_i64");
-  patterns.insert<CallOpConversion<IREE::VM::XorI64Op>>(context, "vm_xor_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::AddI64Op>>(context,
+                                                           "vm_add_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::SubI64Op>>(context,
+                                                           "vm_sub_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::MulI64Op>>(context,
+                                                           "vm_mul_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::DivI64SOp>>(context,
+                                                            "vm_div_i64s");
+  patterns.insert<GenericOpConversion<IREE::VM::DivI64UOp>>(context,
+                                                            "vm_div_i64u");
+  patterns.insert<GenericOpConversion<IREE::VM::RemI64SOp>>(context,
+                                                            "vm_rem_i64s");
+  patterns.insert<GenericOpConversion<IREE::VM::RemI64UOp>>(context,
+                                                            "vm_rem_i64u");
+  patterns.insert<GenericOpConversion<IREE::VM::FMAI64Op>>(context,
+                                                           "vm_fma_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::NotI64Op>>(context,
+                                                           "vm_not_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::AndI64Op>>(context,
+                                                           "vm_and_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::OrI64Op>>(context, "vm_or_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::XorI64Op>>(context,
+                                                           "vm_xor_i64");
 
   // ExtI64: Casting and type conversion/emulation ops
-  patterns.insert<CallOpConversion<IREE::VM::TruncI64I32Op>>(context,
-                                                             "vm_trunc_i64i32");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI32I64SOp>>(context,
-                                                            "vm_ext_i32i64s");
-  patterns.insert<CallOpConversion<IREE::VM::ExtI32I64UOp>>(context,
-                                                            "vm_ext_i32i64u");
+  patterns.insert<GenericOpConversion<IREE::VM::TruncI64I32Op>>(
+      context, "vm_trunc_i64i32");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI32I64SOp>>(
+      context, "vm_ext_i32i64s");
+  patterns.insert<GenericOpConversion<IREE::VM::ExtI32I64UOp>>(
+      context, "vm_ext_i32i64u");
 
   // ExtI64: Native bitwise shift and rotate ops
-  patterns.insert<CallOpConversion<IREE::VM::ShlI64Op>>(context, "vm_shl_i64");
-  patterns.insert<CallOpConversion<IREE::VM::ShrI64SOp>>(context,
-                                                         "vm_shr_i64s");
-  patterns.insert<CallOpConversion<IREE::VM::ShrI64UOp>>(context,
-                                                         "vm_shr_i64u");
+  patterns.insert<GenericOpConversion<IREE::VM::ShlI64Op>>(context,
+                                                           "vm_shl_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::ShrI64SOp>>(context,
+                                                            "vm_shr_i64s");
+  patterns.insert<GenericOpConversion<IREE::VM::ShrI64UOp>>(context,
+                                                            "vm_shr_i64u");
 
   // ExtI64: Comparison ops
-  patterns.insert<CallOpConversion<IREE::VM::CmpEQI64Op>>(context,
-                                                          "vm_cmp_eq_i64");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNEI64Op>>(context,
-                                                          "vm_cmp_ne_i64");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTI64SOp>>(context,
-                                                           "vm_cmp_lt_i64s");
-  patterns.insert<CallOpConversion<IREE::VM::CmpLTI64UOp>>(context,
-                                                           "vm_cmp_lt_i64u");
-  patterns.insert<CallOpConversion<IREE::VM::CmpNZI64Op>>(context,
-                                                          "vm_cmp_nz_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpEQI64Op>>(context,
+                                                             "vm_cmp_eq_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNEI64Op>>(context,
+                                                             "vm_cmp_ne_i64");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTI64SOp>>(context,
+                                                              "vm_cmp_lt_i64s");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpLTI64UOp>>(context,
+                                                              "vm_cmp_lt_i64u");
+  patterns.insert<GenericOpConversion<IREE::VM::CmpNZI64Op>>(context,
+                                                             "vm_cmp_nz_i64");
 }
 
 namespace IREE {
@@ -1531,7 +2222,8 @@
     : public PassWrapper<ConvertVMToEmitCPass,
                          OperationPass<IREE::VM::ModuleOp>> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<mlir::emitc::EmitCDialect, IREEDialect>();
+    registry.insert<mlir::emitc::EmitCDialect, mlir::BuiltinDialect,
+                    mlir::StandardOpsDialect, IREEDialect>();
   }
 
   StringRef getArgument() const override { return "iree-convert-vm-to-emitc"; }
@@ -1541,22 +2233,36 @@
   }
 
   void runOnOperation() override {
+    IREE::VM::ModuleOp module = getOperation();
+
     ConversionTarget target(getContext());
     EmitCTypeConverter typeConverter;
 
+    // Run analysis passes
     VMAnalysisCache vmAnalysisCache;
 
-    for (auto funcOp : getOperation().getOps<IREE::VM::FuncOp>()) {
+    // Convert vm.func ops to std.func with the calling convntion used by EmitC.
+    // We convert these upfront to make sure vm.call ops always reference
+    // std.func ops with the correct calling convention during the conversion.
+    SmallVector<IREE::VM::FuncOp, 4> funcsToRemove;
+    for (auto funcOp : module.getOps<IREE::VM::FuncOp>()) {
       Operation *op = funcOp.getOperation();
       vmAnalysisCache.insert(std::make_pair(
           op, VMAnalysis{RegisterAllocation(op), ValueLiveness(op)}));
+
+      if (failed(convertFuncOp(funcOp, vmAnalysisCache)))
+        return signalPassFailure();
+      funcsToRemove.push_back(funcOp);
     }
 
+    for (auto &funcOp : funcsToRemove) funcOp.erase();
+
     OwningRewritePatternList patterns(&getContext());
     populateVMToEmitCPatterns(&getContext(), typeConverter, patterns,
                               vmAnalysisCache);
 
-    target.addLegalDialect<mlir::emitc::EmitCDialect>();
+    target.addLegalDialect<mlir::emitc::EmitCDialect, mlir::BuiltinDialect,
+                           mlir::StandardOpsDialect>();
 
     target.addDynamicallyLegalOp<IREE::DoNotOptimizeOp>(
         [&](IREE::DoNotOptimizeOp op) {
@@ -1566,7 +2272,6 @@
     // Structural ops
     target.addLegalOp<IREE::VM::ModuleOp>();
     target.addLegalOp<IREE::VM::ModuleTerminatorOp>();
-    target.addLegalOp<IREE::VM::FuncOp>();
     target.addLegalOp<IREE::VM::ExportOp>();
     target.addLegalOp<IREE::VM::ImportOp>();
 
@@ -1576,17 +2281,7 @@
     target.addLegalOp<IREE::VM::GlobalF32Op>();
     target.addLegalOp<IREE::VM::RodataOp>();
 
-    // Control flow ops
-    target.addLegalOp<IREE::VM::BranchOp>();
-    target.addLegalOp<IREE::VM::CondBranchOp>();
-    // Note: We translate the fail op to two function calls in the
-    // end, but we can't simply convert it here because it is a
-    // terminator and an EmitC call is not.
-    target.addLegalOp<IREE::VM::FailOp>();
-    target.addLegalOp<IREE::VM::ReturnOp>();
-
-    if (failed(
-            applyFullConversion(getOperation(), target, std::move(patterns)))) {
+    if (failed(applyFullConversion(module, target, std::move(patterns)))) {
       return signalPassFailure();
     }
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
index 01630cd..47d5f25 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h
@@ -19,6 +19,11 @@
 struct VMAnalysis {
   RegisterAllocation registerAllocation;
   ValueLiveness valueLiveness;
+
+  VMAnalysis(VMAnalysis &&) = default;
+  VMAnalysis &operator=(VMAnalysis &&) = default;
+  VMAnalysis(const VMAnalysis &) = delete;
+  VMAnalysis &operator=(const VMAnalysis &) = delete;
 };
 
 using VMAnalysisCache = DenseMap<Operation *, VMAnalysis>;
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops.mlir
index 7c068c3..4ff0777 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: @add_i32
+// CHECK-LABEL: @my_module_add_i32
 vm.module @my_module {
   vm.func @add_i32(%arg0: i32, %arg1: i32) {
-    // CHECK-NEXT: %0 = emitc.call "vm_add_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_add_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.add.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -11,10 +11,10 @@
 
 // -----
 
-// CHECK-LABEL: @sub_i32
+// CHECK-LABEL: @my_module_sub_i32
 vm.module @my_module {
   vm.func @sub_i32(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_sub_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_sub_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.sub.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -22,10 +22,10 @@
 
 // -----
 
-// CHECK-LABEL: @mul_i32
+// CHECK-LABEL: @my_module_mul_i32
 vm.module @my_module {
   vm.func @mul_i32(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_mul_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_mul_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.mul.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -33,10 +33,10 @@
 
 // -----
 
-// CHECK-LABEL: @div_i32_s
+// CHECK-LABEL: @my_module_div_i32_s
 vm.module @my_module {
   vm.func @div_i32_s(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_div_i32s"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_div_i32s"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.div.i32.s %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -44,10 +44,10 @@
 
 // -----
 
-// CHECK-LABEL: @div_i32_u
+// CHECK-LABEL: @my_module_div_i32_u
 vm.module @my_module {
   vm.func @div_i32_u(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_div_i32u"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_div_i32u"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.div.i32.u %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -55,10 +55,10 @@
 
 // -----
 
-// CHECK-LABEL: @rem_i32_s
+// CHECK-LABEL: @my_module_rem_i32_s
 vm.module @my_module {
   vm.func @rem_i32_s(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_rem_i32s"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_rem_i32s"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.rem.i32.s %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -66,10 +66,10 @@
 
 // -----
 
-// CHECK-LABEL: @rem_i32_u
+// CHECK-LABEL: @my_module_rem_i32_u
 vm.module @my_module {
   vm.func @rem_i32_u(%arg0: i32, %arg1: i32) {
-    // CHECK: %0 = emitc.call "vm_rem_i32u"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_rem_i32u"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.rem.i32.u %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -77,10 +77,10 @@
 
 // -----
 
-// CHECK-LABEL: @fma_i32
+// CHECK-LABEL: @my_module_fma_i32
 vm.module @my_module {
   vm.func @fma_i32(%arg0: i32, %arg1: i32, %arg2: i32) {
-    // CHECK: %0 = emitc.call "vm_fma_i32"(%arg0, %arg1, %arg2) : (i32, i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_fma_i32"(%arg3, %arg4, %arg5) : (i32, i32, i32) -> i32
     %0 = vm.fma.i32 %arg0, %arg1, %arg2 : i32
     vm.return %0 : i32
   }
@@ -88,10 +88,10 @@
 
 // -----
 
-// CHECK-LABEL: @not_i32
+// CHECK-LABEL: @my_module_not_i32
 vm.module @my_module {
   vm.func @not_i32(%arg0 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_not_i32"(%arg0) : (i32) -> i32
+    // CHECK: %0 = emitc.call "vm_not_i32"(%arg3) : (i32) -> i32
     %0 = vm.not.i32 %arg0 : i32
     vm.return %0 : i32
   }
@@ -99,10 +99,10 @@
 
 // -----
 
-// CHECK-LABEL: @and_i32
+// CHECK-LABEL: @my_module_and_i32
 vm.module @my_module {
   vm.func @and_i32(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_and_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_and_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.and.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -110,10 +110,10 @@
 
 // -----
 
-// CHECK-LABEL: @or_i32
+// CHECK-LABEL: @my_module_or_i32
 vm.module @my_module {
   vm.func @or_i32(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_or_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_or_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.or.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -121,10 +121,10 @@
 
 // -----
 
-// CHECK-LABEL: @xor_i32
+// CHECK-LABEL: @my_module_xor_i32
 vm.module @my_module {
   vm.func @xor_i32(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_xor_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_xor_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.xor.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_f32.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_f32.mlir
index f880972..bc9e4f6 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_f32.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_f32.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: @add_f32
+// CHECK-LABEL: @my_module_add_f32
 vm.module @my_module {
   vm.func @add_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_add_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_add_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.add.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -11,10 +11,10 @@
 
 // -----
 
-// CHECK-LABEL: @sub_f32
+// CHECK-LABEL: @my_module_sub_f32
 vm.module @my_module {
   vm.func @sub_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_sub_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_sub_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.sub.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -22,10 +22,10 @@
 
 // -----
 
-// CHECK-LABEL: @mul_f32
+// CHECK-LABEL: @my_module_mul_f32
 vm.module @my_module {
   vm.func @mul_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_mul_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_mul_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.mul.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -33,10 +33,10 @@
 
 // -----
 
-// CHECK-LABEL: @div_f32
+// CHECK-LABEL: @my_module_div_f32
 vm.module @my_module {
   vm.func @div_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_div_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_div_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.div.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -44,10 +44,10 @@
 
 // -----
 
-// CHECK-LABEL: @rem_f32
+// CHECK-LABEL: @my_module_rem_f32
 vm.module @my_module {
   vm.func @rem_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_rem_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_rem_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.rem.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -55,10 +55,10 @@
 
 // -----
 
-// CHECK-LABEL: @fma_f32
+// CHECK-LABEL: @my_module_fma_f32
 vm.module @my_module {
   vm.func @fma_f32(%arg0: f32, %arg1: f32, %arg2: f32) {
-    // CHECK: %0 = emitc.call "vm_fma_f32"(%arg0, %arg1, %arg2) : (f32, f32, f32) -> f32
+    // CHECK: %0 = emitc.call "vm_fma_f32"(%arg3, %arg4, %arg5) : (f32, f32, f32) -> f32
     %0 = vm.fma.f32 %arg0, %arg1, %arg2 : f32
     vm.return %0 : f32
   }
@@ -66,10 +66,10 @@
 
 // -----
 
-// CHECK-LABEL: @abs_f32
+// CHECK-LABEL: @my_module_abs_f32
 vm.module @my_module {
   vm.func @abs_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_abs_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_abs_f32"(%arg3) : (f32) -> f32
     %0 = vm.abs.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -77,10 +77,10 @@
 
 // -----
 
-// CHECK-LABEL: @neg_f32
+// CHECK-LABEL: @my_module_neg_f32
 vm.module @my_module {
   vm.func @neg_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_neg_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_neg_f32"(%arg3) : (f32) -> f32
     %0 = vm.neg.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -88,10 +88,10 @@
 
 // -----
 
-// CHECK-LABEL: @ceil_f32
+// CHECK-LABEL: @my_module_ceil_f32
 vm.module @my_module {
   vm.func @ceil_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_ceil_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_ceil_f32"(%arg3) : (f32) -> f32
     %0 = vm.ceil.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -99,10 +99,10 @@
 
 // -----
 
-// CHECK-LABEL: @floor_f32
+// CHECK-LABEL: @my_module_floor_f32
 vm.module @my_module {
   vm.func @floor_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_floor_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_floor_f32"(%arg3) : (f32) -> f32
     %0 = vm.floor.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -110,10 +110,10 @@
 
 // -----
 
-// CHECK-LABEL: @atan_f32
+// CHECK-LABEL: @my_module_atan_f32
 vm.module @my_module {
   vm.func @atan_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_atan_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_atan_f32"(%arg3) : (f32) -> f32
     %0 = vm.atan.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -121,10 +121,10 @@
 
 // -----
 
-// CHECK-LABEL: @atan2_f32
+// CHECK-LABEL: @my_module_atan2_f32
 vm.module @my_module {
   vm.func @atan2_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_atan2_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_atan2_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.atan2.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -132,10 +132,10 @@
 
 // -----
 
-// CHECK-LABEL: @cos_f32
+// CHECK-LABEL: @my_module_cos_f32
 vm.module @my_module {
   vm.func @cos_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cos_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_cos_f32"(%arg3) : (f32) -> f32
     %0 = vm.cos.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -143,10 +143,10 @@
 
 // -----
 
-// CHECK-LABEL: @sin_f32
+// CHECK-LABEL: @my_module_sin_f32
 vm.module @my_module {
   vm.func @sin_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_sin_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_sin_f32"(%arg3) : (f32) -> f32
     %0 = vm.sin.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -154,10 +154,10 @@
 
 // -----
 
-// CHECK-LABEL: @exp_f32
+// CHECK-LABEL: @my_module_exp_f32
 vm.module @my_module {
   vm.func @exp_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_exp_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_exp_f32"(%arg3) : (f32) -> f32
     %0 = vm.exp.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -165,10 +165,10 @@
 
 // -----
 
-// CHECK-LABEL: @exp2_f32
+// CHECK-LABEL: @my_module_exp2_f32
 vm.module @my_module {
   vm.func @exp2_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_exp2_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_exp2_f32"(%arg3) : (f32) -> f32
     %0 = vm.exp2.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -176,10 +176,10 @@
 
 // -----
 
-// CHECK-LABEL: @expm1_f32
+// CHECK-LABEL: @my_module_expm1_f32
 vm.module @my_module {
   vm.func @expm1_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_expm1_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_expm1_f32"(%arg3) : (f32) -> f32
     %0 = vm.expm1.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -187,10 +187,10 @@
 
 // -----
 
-// CHECK-LABEL: @log_f32
+// CHECK-LABEL: @my_module_log_f32
 vm.module @my_module {
   vm.func @log_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_log_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_log_f32"(%arg3) : (f32) -> f32
     %0 = vm.log.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -198,10 +198,10 @@
 
 // -----
 
-// CHECK-LABEL: @log10_f32
+// CHECK-LABEL: @my_module_log10_f32
 vm.module @my_module {
   vm.func @log10_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_log10_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_log10_f32"(%arg3) : (f32) -> f32
     %0 = vm.log10.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -209,10 +209,10 @@
 
 // -----
 
-// CHECK-LABEL: @log1p_f32
+// CHECK-LABEL: @my_module_log1p_f32
 vm.module @my_module {
   vm.func @log1p_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_log1p_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_log1p_f32"(%arg3) : (f32) -> f32
     %0 = vm.log1p.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -220,10 +220,10 @@
 
 // -----
 
-// CHECK-LABEL: @log2_f32
+// CHECK-LABEL: @my_module_log2_f32
 vm.module @my_module {
   vm.func @log2_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_log2_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_log2_f32"(%arg3) : (f32) -> f32
     %0 = vm.log2.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -231,10 +231,10 @@
 
 // -----
 
-// CHECK-LABEL: @pow_f32
+// CHECK-LABEL: @my_module_pow_f32
 vm.module @my_module {
   vm.func @pow_f32(%arg0 : f32, %arg1 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_pow_f32"(%arg0, %arg1) : (f32, f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_pow_f32"(%arg3, %arg4) : (f32, f32) -> f32
     %0 = vm.pow.f32 %arg0, %arg1 : f32
     vm.return %0 : f32
   }
@@ -242,10 +242,10 @@
 
 // -----
 
-// CHECK-LABEL: @rsqrt_f32
+// CHECK-LABEL: @my_module_rsqrt_f32
 vm.module @my_module {
   vm.func @rsqrt_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_rsqrt_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_rsqrt_f32"(%arg3) : (f32) -> f32
     %0 = vm.rsqrt.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -253,10 +253,10 @@
 
 // -----
 
-// CHECK-LABEL: @sqrt_f32
+// CHECK-LABEL: @my_module_sqrt_f32
 vm.module @my_module {
   vm.func @sqrt_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_sqrt_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_sqrt_f32"(%arg3) : (f32) -> f32
     %0 = vm.sqrt.f32 %arg0 : f32
     vm.return %0 : f32
   }
@@ -264,10 +264,10 @@
 
 // -----
 
-// CHECK-LABEL: @tanh_f32
+// CHECK-LABEL: @my_module_tanh_f32
 vm.module @my_module {
   vm.func @tanh_f32(%arg0 : f32) -> f32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_tanh_f32"(%arg0) : (f32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_tanh_f32"(%arg3) : (f32) -> f32
     %0 = vm.tanh.f32 %arg0 : f32
     vm.return %0 : f32
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_i64.mlir
index 6123ccd..4b8b0aa 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/arithmetic_ops_i64.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: @add_i64
+// CHECK-LABEL: @my_module_add_i64
 vm.module @my_module {
   vm.func @add_i64(%arg0: i64, %arg1: i64) {
-    // CHECK-NEXT: %0 = emitc.call "vm_add_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK-NEXT: %0 = emitc.call "vm_add_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.add.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -11,10 +11,10 @@
 
 // -----
 
-// CHECK-LABEL: @sub_i64
+// CHECK-LABEL: @my_module_sub_i64
 vm.module @my_module {
   vm.func @sub_i64(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_sub_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_sub_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.sub.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -22,10 +22,10 @@
 
 // -----
 
-// CHECK-LABEL: @mul_i64
+// CHECK-LABEL: @my_module_mul_i64
 vm.module @my_module {
   vm.func @mul_i64(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_mul_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_mul_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.mul.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -33,10 +33,10 @@
 
 // -----
 
-// CHECK-LABEL: @div_i64_s
+// CHECK-LABEL: @my_module_div_i64_s
 vm.module @my_module {
   vm.func @div_i64_s(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_div_i64s"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_div_i64s"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.div.i64.s %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -44,10 +44,10 @@
 
 // -----
 
-// CHECK-LABEL: @div_i64_u
+// CHECK-LABEL: @my_module_div_i64_u
 vm.module @my_module {
   vm.func @div_i64_u(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_div_i64u"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_div_i64u"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.div.i64.u %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -55,10 +55,10 @@
 
 // -----
 
-// CHECK-LABEL: @rem_i64_s
+// CHECK-LABEL: @my_module_rem_i64_s
 vm.module @my_module {
   vm.func @rem_i64_s(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_rem_i64s"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_rem_i64s"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.rem.i64.s %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -66,10 +66,10 @@
 
 // -----
 
-// CHECK-LABEL: @rem_i64_u
+// CHECK-LABEL: @my_module_rem_i64_u
 vm.module @my_module {
   vm.func @rem_i64_u(%arg0: i64, %arg1: i64) {
-    // CHECK: %0 = emitc.call "vm_rem_i64u"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_rem_i64u"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.rem.i64.u %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -77,10 +77,10 @@
 
 // -----
 
-// CHECK-LABEL: @fma_i64
+// CHECK-LABEL: @my_module_fma_i64
 vm.module @my_module {
   vm.func @fma_i64(%arg0: i64, %arg1: i64, %arg2: i64) {
-    // CHECK: %0 = emitc.call "vm_fma_i64"(%arg0, %arg1, %arg2) : (i64, i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_fma_i64"(%arg3, %arg4, %arg5) : (i64, i64, i64) -> i64
     %0 = vm.fma.i64 %arg0, %arg1, %arg2 : i64
     vm.return %0 : i64
   }
@@ -88,10 +88,10 @@
 
 // -----
 
-// CHECK-LABEL: @not_i64
+// CHECK-LABEL: @my_module_not_i64
 vm.module @my_module {
   vm.func @not_i64(%arg0 : i64) -> i64 {
-    // CHECK: %0 = emitc.call "vm_not_i64"(%arg0) : (i64) -> i64
+    // CHECK: %0 = emitc.call "vm_not_i64"(%arg3) : (i64) -> i64
     %0 = vm.not.i64 %arg0 : i64
     vm.return %0 : i64
   }
@@ -99,10 +99,10 @@
 
 // -----
 
-// CHECK-LABEL: @and_i64
+// CHECK-LABEL: @my_module_and_i64
 vm.module @my_module {
   vm.func @and_i64(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK: %0 = emitc.call "vm_and_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_and_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.and.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -110,10 +110,10 @@
 
 // -----
 
-// CHECK-LABEL: @or_i64
+// CHECK-LABEL: @my_module_or_i64
 vm.module @my_module {
   vm.func @or_i64(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK: %0 = emitc.call "vm_or_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_or_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.or.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -121,10 +121,10 @@
 
 // -----
 
-// CHECK-LABEL: @xor_i64
+// CHECK-LABEL: @my_module_xor_i64
 vm.module @my_module {
   vm.func @xor_i64(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK: %0 = emitc.call "vm_xor_i64"(%arg0, %arg1) : (i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_xor_i64"(%arg3, %arg4) : (i64, i64) -> i64
     %0 = vm.xor.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops.mlir
index 95c7c22..049b3ec 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @select_i32
+// CHECK-LABEL: @my_module_select_i32
 vm.module @my_module {
   vm.func @select_i32(%arg0 : i32, %arg1 : i32, %arg2 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_select_i32"(%arg0, %arg1, %arg2) : (i32, i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_select_i32"(%arg3, %arg4, %arg5) : (i32, i32, i32) -> i32
     %0 = vm.select.i32 %arg0, %arg1, %arg2 : i32
     vm.return %0 : i32
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_f32.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_f32.mlir
index d9b5cad..3fc3189 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_f32.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_f32.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @select_f32
+// CHECK-LABEL: @my_module_select_f32
 vm.module @my_module {
   vm.func @select_f32(%arg0 : i32, %arg1 : f32, %arg2 : f32) -> f32 {
-    // CHECK: %0 = emitc.call "vm_select_f32"(%arg0, %arg1, %arg2) : (i32, f32, f32) -> f32
+    // CHECK: %0 = emitc.call "vm_select_f32"(%arg3, %arg4, %arg5) : (i32, f32, f32) -> f32
     %0 = vm.select.f32 %arg0, %arg1, %arg2 : f32
     vm.return %0 : f32
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_i64.mlir
index 5a1dfcb..7ef872f 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/assignment_ops_i64.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @select_i64
+// CHECK-LABEL: @my_module_select_i64
 vm.module @my_module {
   vm.func @select_i64(%arg0 : i32, %arg1 : i64, %arg2 : i64) -> i64 {
-    // CHECK: %0 = emitc.call "vm_select_i64"(%arg0, %arg1, %arg2) : (i32, i64, i64) -> i64
+    // CHECK: %0 = emitc.call "vm_select_i64"(%arg3, %arg4, %arg5) : (i32, i64, i64) -> i64
     %0 = vm.select.i64 %arg0, %arg1, %arg2 : i64
     vm.return %0 : i64
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops.mlir
index 42c72a3..ae98829 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops.mlir
@@ -1,11 +1,9 @@
-// Tests printing and parsing of comparison ops.
-
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_eq_i32
+  // CHECK-LABEL: @module_cmp_eq_i32
   vm.func @cmp_eq_i32(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.cmp.eq.i32 %arg0, %arg1 : i32
     vm.return
   }
@@ -14,9 +12,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_ne_i32
+  // CHECK-LABEL: @module_cmp_ne_i32
   vm.func @cmp_ne_i32(%arg0 : i32, %arg1 : i32) {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.cmp.ne.i32 %arg0, %arg1 : i32
     vm.return
   }
@@ -25,9 +23,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_i32_s
+  // CHECK-LABEL: @module_cmp_lt_i32_s
   vm.func @cmp_lt_i32_s(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i32s"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i32s"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.cmp.lt.i32.s %arg0, %arg1 : i32
     vm.return
   }
@@ -36,9 +34,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_i32_u
+  // CHECK-LABEL: @module_cmp_lt_i32_u
   vm.func @cmp_lt_i32_u(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i32u"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i32u"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.cmp.lt.i32.u %arg0, %arg1 : i32
     vm.return
   }
@@ -47,9 +45,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_nz_i32
+  // CHECK-LABEL: @module_cmp_nz_i32
   vm.func @cmp_nz_i32(%arg0 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nz_i32"(%arg0) : (i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nz_i32"(%arg3) : (i32) -> i32
     %0 = vm.cmp.nz.i32 %arg0 : i32
     vm.return
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_f32.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_f32.mlir
index 002060c..dba5173 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_f32.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_f32.mlir
@@ -1,11 +1,9 @@
-// Tests printing and parsing of comparison ops.
-
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_eq_f32o
+  // CHECK-LABEL: @module_cmp_eq_f32o
   vm.func @cmp_eq_f32o(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_f32o"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_f32o"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.eq.f32.o %arg0, %arg1 : f32
     vm.return
   }
@@ -14,9 +12,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_eq_f32u
+  // CHECK-LABEL: @module_cmp_eq_f32u
   vm.func @cmp_eq_f32u(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_f32u"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_f32u"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.eq.f32.u %arg0, %arg1 : f32
     vm.return
   }
@@ -25,9 +23,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_ne_f32o
+  // CHECK-LABEL: @module_cmp_ne_f32o
   vm.func @cmp_ne_f32o(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_f32o"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_f32o"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.ne.f32.o %arg0, %arg1 : f32
     vm.return
   }
@@ -36,9 +34,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_ne_f32u
+  // CHECK-LABEL: @module_cmp_ne_f32u
   vm.func @cmp_ne_f32u(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_f32u"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_f32u"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.ne.f32.u %arg0, %arg1 : f32
     vm.return
   }
@@ -47,9 +45,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_f32o
+  // CHECK-LABEL: @module_cmp_lt_f32o
   vm.func @cmp_lt_f32o(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_f32o"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_f32o"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.lt.f32.o %arg0, %arg1 : f32
     vm.return
   }
@@ -58,9 +56,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_f32u
+  // CHECK-LABEL: @module_cmp_lt_f32u
   vm.func @cmp_lt_f32u(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_f32u"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_f32u"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.lt.f32.u %arg0, %arg1 : f32
     vm.return
   }
@@ -69,9 +67,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lte_f32o
+  // CHECK-LABEL: @module_cmp_lte_f32o
   vm.func @cmp_lte_f32o(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lte_f32o"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lte_f32o"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.lte.f32.o %arg0, %arg1 : f32
     vm.return
   }
@@ -80,9 +78,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lte_f32u
+  // CHECK-LABEL: @module_cmp_lte_f32u
   vm.func @cmp_lte_f32u(%arg0 : f32, %arg1 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lte_f32u"(%arg0, %arg1) : (f32, f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lte_f32u"(%arg3, %arg4) : (f32, f32) -> i32
     %0 = vm.cmp.lte.f32.u %arg0, %arg1 : f32
     vm.return
   }
@@ -91,9 +89,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_nan_f32
+  // CHECK-LABEL: @module_cmp_nan_f32
   vm.func @cmp_nan_f32(%arg0 : f32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nan_f32"(%arg0) : (f32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nan_f32"(%arg3) : (f32) -> i32
     %0 = vm.cmp.nan.f32 %arg0 : f32
     vm.return
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_i64.mlir
index 2185da9..f60b8ae 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/comparison_ops_i64.mlir
@@ -1,11 +1,9 @@
-// Tests printing and parsing of comparison ops.
-
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_eq_i64
+  // CHECK-LABEL: @module_cmp_eq_i64
   vm.func @cmp_eq_i64(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_i64"(%arg0, %arg1) : (i64, i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_eq_i64"(%arg3, %arg4) : (i64, i64) -> i32
     %0 = vm.cmp.eq.i64 %arg0, %arg1 : i64
     vm.return
   }
@@ -14,9 +12,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_ne_i64
+  // CHECK-LABEL: @module_cmp_ne_i64
   vm.func @cmp_ne_i64(%arg0 : i64, %arg1 : i64) {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_i64"(%arg0, %arg1) : (i64, i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_ne_i64"(%arg3, %arg4) : (i64, i64) -> i32
     %0 = vm.cmp.ne.i64 %arg0, %arg1 : i64
     vm.return
   }
@@ -25,9 +23,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_i64_s
+  // CHECK-LABEL: @module_cmp_lt_i64_s
   vm.func @cmp_lt_i64_s(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i64s"(%arg0, %arg1) : (i64, i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i64s"(%arg3, %arg4) : (i64, i64) -> i32
     %0 = vm.cmp.lt.i64.s %arg0, %arg1 : i64
     vm.return
   }
@@ -36,9 +34,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_lt_i64_u
+  // CHECK-LABEL: @module_cmp_lt_i64_u
   vm.func @cmp_lt_i64_u(%arg0 : i64, %arg1 : i64) -> i64 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i64u"(%arg0, %arg1) : (i64, i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_lt_i64u"(%arg3, %arg4) : (i64, i64) -> i32
     %0 = vm.cmp.lt.i64.u %arg0, %arg1 : i64
     vm.return
   }
@@ -47,9 +45,9 @@
 // -----
 
 vm.module @module {
-  // CHECK-LABEL: vm.func @cmp_nz_i64
+  // CHECK-LABEL: @module_cmp_nz_i64
   vm.func @cmp_nz_i64(%arg0 : i64) -> i64 {
-    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nz_i64"(%arg0) : (i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_cmp_nz_i64"(%arg3) : (i64) -> i32
     %0 = vm.cmp.nz.i64 %arg0 : i64
     vm.return
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops.mlir
index 144a1bb..6408d06 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops.mlir
@@ -2,7 +2,7 @@
 
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_i32_zero
+  // CHECK-LABEL: @my_module_const_i32_zero
   vm.func @const_i32_zero() -> i32 {
     // CHECK: %[[ZERO:.+]] = "emitc.constant"() {value = 0 : i32} : () -> i32
     %zero = vm.const.i32.zero : i32
@@ -13,7 +13,7 @@
 // -----
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_i32
+  // CHECK-LABEL: @my_module_const_i32
   vm.func @const_i32() {
     // CHECK-NEXT: %0 = "emitc.constant"() {value = 0 : i32} : () -> i32
     %0 = vm.const.i32 0 : i32
@@ -28,10 +28,11 @@
 // -----
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_ref_zero
-  vm.func @const_ref_zero() -> !vm.ref<?> {
-    // CHECK: %[[REF:.+]] = emitc.call "VM_ARRAY_ELEMENT_ADDRESS"() {args = [#emitc.opaque<"local_refs">, 0 : i32]} : () -> !emitc.opaque<"iree_vm_ref_t*">
-    // CHECK: emitc.call "iree_vm_ref_release"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t*">) -> ()
+  // CHECK-LABEL: @my_module_const_ref_zero
+  vm.func @const_ref_zero() {
+    // CHECK: %[[REF:.+]] = "emitc.constant"() {ref_ordinal = 0 : index, value = #emitc.opaque<"{0}">} : () -> !emitc.opaque<"iree_vm_ref_t">
+    // CHECK-NEXT: %[[REFPTR:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.opaque<"iree_vm_ref_t*">
+    // CHECK-NEXT: emitc.call "iree_vm_ref_release"(%[[REFPTR]]) : (!emitc.opaque<"iree_vm_ref_t*">) -> ()
     %null = vm.const.ref.zero : !vm.ref<?>
     vm.return
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_f32.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_f32.mlir
index 01eb92c..eb5e30c 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_f32.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_f32.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_f32_zero
+  // CHECK-LABEL: @my_module_const_f32_zero
   vm.func @const_f32_zero() -> f32 {
     // CHECK: %[[ZERO:.+]] = "emitc.constant"() {value = 0.000000e+00 : f32} : () -> f32
     %zero = vm.const.f32.zero : f32
@@ -12,7 +12,7 @@
 // -----
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_f32
+  // CHECK-LABEL: @my_module_const_f32
   vm.func @const_f32() {
     // CHECK-NEXT: %0 = "emitc.constant"() {value = 5.000000e-01 : f32} : () -> f32
     %0 = vm.const.f32 0.5 : f32
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_i64.mlir
index 584fed5..ca70fcc 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/const_ops_i64.mlir
@@ -2,7 +2,7 @@
 
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_i64_zero
+  // CHECK-LABEL: @my_module_const_i64_zero
   vm.func @const_i64_zero() -> i64 {
     // CHECK: %[[ZERO:.+]] = "emitc.constant"() {value = 0 : i64} : () -> i64
     %zero = vm.const.i64.zero : i64
@@ -13,7 +13,7 @@
 // -----
 
 vm.module @my_module {
-  // CHECK-LABEL: vm.func @const_i64
+  // CHECK-LABEL: @my_module_const_i64
   vm.func @const_i64() {
     // CHECK-NEXT: %0 = "emitc.constant"() {value = 0 : i64} : () -> i64
     %0 = vm.const.i64 0 : i64
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops.mlir
index 00e1874..7d6cbab 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @trunc
+// CHECK-LABEL: @my_module_trunc
 vm.module @my_module {
   vm.func @trunc(%arg0 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_trunc_i32i8"(%arg0) : (i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_trunc_i32i8"(%arg3) : (i32) -> i32
     %0 = vm.trunc.i32.i8 %arg0 : i32 -> i32
     // CHECK-NEXT: %1 = emitc.call "vm_trunc_i32i16"(%0) : (i32) -> i32
     %1 = vm.trunc.i32.i16 %0 : i32 -> i32
@@ -13,10 +13,10 @@
 
 // -----
 
-// CHECK-LABEL: vm.func @ext
+// CHECK-LABEL: @my_module_ext
 vm.module @my_module {
   vm.func @ext(%arg0 : i32) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_ext_i8i32s"(%arg0) : (i32) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_ext_i8i32s"(%arg3) : (i32) -> i32
     %0 = vm.ext.i8.i32.s %arg0 : i32 -> i32
     // CHECK-NEXT: %1 = emitc.call "vm_ext_i8i32u"(%0) : (i32) -> i32
     %1 = vm.ext.i8.i32.u %0 : i32 -> i32
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_f32.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_f32.mlir
index 579fe29..dae8de4 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_f32.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_f32.mlir
@@ -1,11 +1,11 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @cast
+// CHECK-LABEL: @my_module_cast
 vm.module @my_module {
   vm.func @cast(%arg0 : i32) -> (i32, i32) {
-    // CHECK-NEXT: %0 = emitc.call "vm_cast_si32f32"(%arg0) : (i32) -> f32
+    // CHECK-NEXT: %0 = emitc.call "vm_cast_si32f32"(%arg3) : (i32) -> f32
     %0 = vm.cast.si32.f32 %arg0 : i32 -> f32
-    // CHECK-NEXT: %1 = emitc.call "vm_cast_ui32f32"(%arg0) : (i32) -> f32
+    // CHECK-NEXT: %1 = emitc.call "vm_cast_ui32f32"(%arg3) : (i32) -> f32
     %1 = vm.cast.ui32.f32 %arg0 : i32 -> f32
     // CHECK-NEXT: %2 = emitc.call "vm_cast_f32si32"(%0) : (f32) -> i32
     %2 = vm.cast.f32.si32 %0 : f32 -> i32
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_i64.mlir
index 924fdfe..6371d31 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/conversion_ops_i64.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: vm.func @trunc_i64
+// CHECK-LABEL: @my_module_trunc_i64
 vm.module @my_module {
   vm.func @trunc_i64(%arg0 : i64) -> i32 {
-    // CHECK-NEXT: %0 = emitc.call "vm_trunc_i64i32"(%arg0) : (i64) -> i32
+    // CHECK-NEXT: %0 = emitc.call "vm_trunc_i64i32"(%arg3) : (i64) -> i32
     %0 = vm.trunc.i64.i32 %arg0 : i64 -> i32
     vm.return %0 : i32
   }
@@ -11,12 +11,12 @@
 
 // -----
 
-// CHECK-LABEL: vm.func @ext_i64
+// CHECK-LABEL: @my_module_ext_i64
 vm.module @my_module {
   vm.func @ext_i64(%arg0 : i32) -> i64 {
-    // CHECK-NEXT: %0 = emitc.call "vm_ext_i32i64s"(%arg0) : (i32) -> i64
+    // CHECK-NEXT: %0 = emitc.call "vm_ext_i32i64s"(%arg3) : (i32) -> i64
     %0 = vm.ext.i32.i64.s %arg0 : i32 -> i64
-    // CHECK-NEXT: %1 = emitc.call "vm_ext_i32i64u"(%arg0) : (i32) -> i64
+    // CHECK-NEXT: %1 = emitc.call "vm_ext_i32i64u"(%arg3) : (i32) -> i64
     %1 = vm.ext.i32.i64.u %arg0 : i32 -> i64
     vm.return %1 : i64
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops.mlir
index ee6bd3f..ebde983 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: @shl_i32
+// CHECK-LABEL: @my_module_shl_i32
 vm.module @my_module {
   vm.func @shl_i32(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_shl_i32"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_shl_i32"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.shl.i32 %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -11,10 +11,10 @@
 
 // -----
 
-// CHECK-LABEL: @shr_i32_s
+// CHECK-LABEL: @my_module_shr_i32_s
 vm.module @my_module {
   vm.func @shr_i32_s(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_shr_i32s"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_shr_i32s"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.shr.i32.s %arg0, %arg1 : i32
     vm.return %0 : i32
   }
@@ -22,10 +22,10 @@
 
 // -----
 
-// CHECK-LABEL: @shr_i32_u
+// CHECK-LABEL: @my_module_shr_i32_u
 vm.module @my_module {
   vm.func @shr_i32_u(%arg0 : i32, %arg1 : i32) -> i32 {
-    // CHECK: %0 = emitc.call "vm_shr_i32u"(%arg0, %arg1) : (i32, i32) -> i32
+    // CHECK: %0 = emitc.call "vm_shr_i32u"(%arg3, %arg4) : (i32, i32) -> i32
     %0 = vm.shr.i32.u %arg0, %arg1 : i32
     vm.return %0 : i32
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops_i64.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops_i64.mlir
index 547d080..4a72b13 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops_i64.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/shift_ops_i64.mlir
@@ -1,9 +1,9 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
-// CHECK-LABEL: @shl_i64
+// CHECK-LABEL: @my_module_shl_i64
 vm.module @my_module {
   vm.func @shl_i64(%arg0 : i64, %arg1 : i32) -> i64 {
-    // CHECK: %0 = emitc.call "vm_shl_i64"(%arg0, %arg1) : (i64, i32) -> i64
+    // CHECK: %0 = emitc.call "vm_shl_i64"(%arg3, %arg4) : (i64, i32) -> i64
     %0 = vm.shl.i64 %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -11,10 +11,10 @@
 
 // -----
 
-// CHECK-LABEL: @shr_i64_s
+// CHECK-LABEL: @my_module_shr_i64_s
 vm.module @my_module {
   vm.func @shr_i64_s(%arg0 : i64, %arg1 : i32) -> i64 {
-    // CHECK: %0 = emitc.call "vm_shr_i64s"(%arg0, %arg1) : (i64, i32) -> i64
+    // CHECK: %0 = emitc.call "vm_shr_i64s"(%arg3, %arg4) : (i64, i32) -> i64
     %0 = vm.shr.i64.s %arg0, %arg1 : i64
     vm.return %0 : i64
   }
@@ -22,10 +22,10 @@
 
 // -----
 
-// CHECK-LABEL: @shr_i64_u
+// CHECK-LABEL: @my_module_shr_i64_u
 vm.module @my_module {
   vm.func @shr_i64_u(%arg0 : i64, %arg1 : i32) -> i64 {
-    // CHECK: %0 = emitc.call "vm_shr_i64u"(%arg0, %arg1) : (i64, i32) -> i64
+    // CHECK: %0 = emitc.call "vm_shr_i64u"(%arg3, %arg4) : (i64, i32) -> i64
     %0 = vm.shr.i64.u %arg0, %arg1 : i64
     vm.return %0 : i64
   }
diff --git a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
index 31f172f..fb63cde 100644
--- a/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
+++ b/iree/compiler/Dialect/VM/Conversion/VMToEmitC/test/type_conversion.mlir
@@ -1,19 +1,21 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='vm.module(iree-vm-ordinal-allocation),vm.module(iree-convert-vm-to-emitc)' %s | IreeFileCheck %s
 
 vm.module @my_module {
-  // CHECK-LABEL: @list_alloc
+  // CHECK-LABEL: @my_module_list_alloc
   vm.func @list_alloc(%arg0: i32) {
+    // CHECK: %[[REF:.+]] = "emitc.constant"() {ref_ordinal = 0 : index, value = #emitc.opaque<"{0}">} : () -> !emitc.opaque<"iree_vm_ref_t"> 
+    // CHECK: %[[LISTREF:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.opaque<"iree_vm_ref_t*"> 
     %list = vm.list.alloc %arg0 : (i32) -> !vm.list<i32>
-    // CHECK: %[[LIST:.+]] = emitc.call "VM_ARRAY_ELEMENT_ADDRESS"() {args = [#emitc.opaque<"local_refs">, 0 : i32]} : () -> !emitc.opaque<"iree_vm_ref_t*">
     %list_dno = iree.do_not_optimize(%list) : !vm.list<i32>
-    // CHECK: iree.do_not_optimize(%[[LIST]]) : !emitc.opaque<"iree_vm_ref_t*">
+    // CHECK: iree.do_not_optimize(%[[LISTREF]]) : !emitc.opaque<"iree_vm_ref_t*">
     vm.return
   }
 
-  // CHECK-LABEL: @list_size
+  // CHECK-LABEL: @my_module_list_size
   vm.func @list_size(%arg0: i32) {
     %list = vm.list.alloc %arg0 : (i32) -> !vm.list<i32>
-    // CHECK: %[[LIST:.+]] = emitc.call "VM_ARRAY_ELEMENT_ADDRESS"() {args = [#emitc.opaque<"local_refs">, 0 : i32]} : () -> !emitc.opaque<"iree_vm_ref_t*">
+    // CHECK: %[[REF:.+]] = "emitc.constant"() {ref_ordinal = 0 : index, value = #emitc.opaque<"{0}">} : () -> !emitc.opaque<"iree_vm_ref_t"> 
+    // CHECK: %[[LISTREF:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.opaque<"iree_vm_ref_t*"> 
     %size = vm.list.size %list : (!vm.list<i32>) -> i32
     // CHECK: %[[SIZE:.+]] = emitc.call "iree_vm_list_size"(%{{.+}})
     %size_dno = iree.do_not_optimize(%size) : i32
@@ -26,13 +28,14 @@
 
 vm.module @my_module {
   vm.rodata private @byte_buffer dense<[1, 2, 3]> : tensor<3xi32>
-  // CHECK-LABEL: @ref
+  // CHECK-LABEL: @my_module_ref
   vm.export @ref
   vm.func @ref(%arg0: i32) {
+    // CHECK: %[[REF:.+]] = "emitc.constant"() {ref_ordinal = 0 : index, value = #emitc.opaque<"{0}">} : () -> !emitc.opaque<"iree_vm_ref_t"> 
+    // CHECK: %[[BUFFERREF:.+]] = emitc.apply "&"(%[[REF]]) : (!emitc.opaque<"iree_vm_ref_t">) -> !emitc.opaque<"iree_vm_ref_t*"> 
     %buffer = vm.const.ref.rodata @byte_buffer : !vm.buffer
-    // CHECK: %[[BUFFER:.+]] = emitc.call "VM_ARRAY_ELEMENT_ADDRESS"() {args = [#emitc.opaque<"local_refs">, 0 : i32]} : () -> !emitc.opaque<"iree_vm_ref_t*">
     %buffer_dno = iree.do_not_optimize(%buffer) : !vm.buffer
-    // CHECK: iree.do_not_optimize(%[[BUFFER]]) : !emitc.opaque<"iree_vm_ref_t*">
+    // CHECK: iree.do_not_optimize(%[[BUFFERREF]]) : !emitc.opaque<"iree_vm_ref_t*">
     vm.return
   }
 }
diff --git a/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt b/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt
index 2678ecc..6884098 100644
--- a/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt
+++ b/iree/compiler/Dialect/VM/Target/C/CMakeLists.txt
@@ -27,7 +27,6 @@
       iree::compiler::Dialect::VM::Analysis
       iree::compiler::Dialect::VM::IR
       iree::compiler::Dialect::VM::Conversion::VMToEmitC
-      iree::compiler::Dialect::VM::Utils::CallingConvention
       iree::compiler::Dialect::VM::Utils::ConstantEncoding
     PUBLIC
   )
diff --git a/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
index 658d756..f5d3deb 100644
--- a/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
+++ b/iree/compiler/Dialect/VM/Target/C/CModuleTarget.cpp
@@ -13,7 +13,6 @@
 #include "iree/compiler/Dialect/VM/Conversion/VMToEmitC/ConvertVMToEmitC.h"
 #include "iree/compiler/Dialect/VM/Conversion/VMToEmitC/DropExcludedExports.h"
 #include "iree/compiler/Dialect/VM/Transforms/Passes.h"
-#include "iree/compiler/Dialect/VM/Utils/CallingConvention.h"
 #include "iree/compiler/Dialect/VM/Utils/ConstantEncoding.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Pass/PassManager.h"
@@ -24,15 +23,6 @@
 namespace IREE {
 namespace VM {
 
-static std::string buildFunctionName(IREE::VM::ModuleOp &moduleOp,
-                                     IREE::VM::FuncOp &funcOp,
-                                     bool implSuffix) {
-  std::string functionName =
-      std::string(moduleOp.getName()) + "_" + std::string(funcOp.getName());
-
-  return implSuffix ? functionName + "_impl" : functionName;
-}
-
 static void printModuleComment(IREE::VM::ModuleOp &moduleOp,
                                llvm::raw_ostream &output) {
   output << "//" << std::string(77, '=') << "\n"
@@ -96,7 +86,7 @@
   llvm::raw_ostream &output = emitter.ostream();
   std::string moduleName = moduleOp.getName().str();
 
-  output << "struct " << moduleName << "_t;\n";
+  output << "struct " << moduleName << "_t {};\n";
   output << "struct " << moduleName << "_state_t {\n";
 
   output << "iree_allocator_t allocator;\n";
@@ -119,9 +109,10 @@
   return success();
 }
 
-static LogicalResult printShim(IREE::VM::FuncOp &funcOp,
+static LogicalResult printShim(mlir::FuncOp &funcOp,
                                llvm::raw_ostream &output) {
-  auto callingConvention = makeCallingConventionString(funcOp);
+  StringAttr callingConvention =
+      funcOp.getOperation()->getAttr("calling_convention").cast<StringAttr>();
   if (!callingConvention) {
     return funcOp.emitError("Couldn't create calling convention string");
   }
@@ -129,36 +120,6 @@
   return success();
 }
 
-static LogicalResult printFuncOpArguments(IREE::VM::FuncOp &funcOp,
-                                          mlir::emitc::CppEmitter &emitter) {
-  return mlir::emitc::interleaveCommaWithError(
-      funcOp.getArguments(), emitter.ostream(), [&](auto arg) -> LogicalResult {
-        if (failed(emitter.emitType(*funcOp.getOperation(), arg.getType()))) {
-          return failure();
-        }
-        emitter.ostream() << " " << emitter.getOrCreateName(arg);
-        return success();
-      });
-}
-
-/// Function results get propagated through pointer arguments
-static LogicalResult printFuncOpResults(
-    IREE::VM::FuncOp &funcOp, mlir::emitc::CppEmitter &emitter,
-    SmallVector<std::string, 4> &resultNames) {
-  return mlir::emitc::interleaveCommaWithError(
-      llvm::zip(funcOp.getType().getResults(), resultNames), emitter.ostream(),
-      [&](std::tuple<Type, std::string> tuple) -> LogicalResult {
-        Type type = std::get<0>(tuple);
-        std::string resultName = std::get<1>(tuple);
-
-        if (failed(emitter.emitType(*funcOp.getOperation(), type))) {
-          return failure();
-        }
-        emitter.ostream() << " *" << resultName;
-        return success();
-      });
-}
-
 static LogicalResult initializeState(IREE::VM::ModuleOp moduleOp,
                                      mlir::emitc::CppEmitter &emitter) {
   llvm::raw_ostream &output = emitter.ostream();
@@ -178,328 +139,14 @@
   return success();
 }
 
-static LogicalResult translateOp(IREE::VM::BranchOp branchOp,
-                                 mlir::emitc::CppEmitter &emitter) {
-  auto &output = emitter.ostream();
-  Block &successor = *branchOp.getSuccessor();
-
-  for (auto pair :
-       llvm::zip(branchOp.getOperands(), successor.getArguments())) {
-    auto &operand = std::get<0>(pair);
-    auto &argument = std::get<1>(pair);
-    output << emitter.getOrCreateName(argument) << " = "
-           << emitter.getOrCreateName(operand) << ";\n";
-  }
-
-  output << "goto ";
-  if (!(emitter.hasBlockLabel(successor))) {
-    return branchOp.emitOpError() << "Unable to find label for successor block";
-  }
-  output << emitter.getOrCreateName(successor) << ";\n";
-  return success();
-}
-
-static LogicalResult translateOp(IREE::VM::CondBranchOp condBranchOp,
-                                 mlir::emitc::CppEmitter &emitter) {
-  llvm::raw_ostream &output = emitter.ostream();
-
-  Block &trueSuccessor = *condBranchOp.getTrueDest();
-  Block &falseSuccessor = *condBranchOp.getFalseDest();
-
-  output << "if (" << emitter.getOrCreateName(condBranchOp.getCondition())
-         << ") {\n";
-
-  // If condition is true.
-  for (auto pair : llvm::zip(condBranchOp.getTrueOperands(),
-                             trueSuccessor.getArguments())) {
-    auto &operand = std::get<0>(pair);
-    auto &argument = std::get<1>(pair);
-    output << emitter.getOrCreateName(argument) << " = "
-           << emitter.getOrCreateName(operand) << ";\n";
-  }
-
-  output << "goto ";
-  if (!(emitter.hasBlockLabel(trueSuccessor))) {
-    return condBranchOp.emitOpError()
-           << "Unable to find label for successor block";
-  }
-  output << emitter.getOrCreateName(trueSuccessor) << ";\n";
-  output << "} else {\n";
-  // If condition is false.
-  for (auto pair : llvm::zip(condBranchOp.getFalseOperands(),
-                             falseSuccessor.getArguments())) {
-    auto &operand = std::get<0>(pair);
-    auto &argument = std::get<1>(pair);
-    output << emitter.getOrCreateName(argument) << " = "
-           << emitter.getOrCreateName(operand) << ";\n";
-  }
-
-  output << "goto ";
-  if (!(emitter.hasBlockLabel(falseSuccessor))) {
-    return condBranchOp.emitOpError()
-           << "Unable to find label for successor block";
-  }
-  output << emitter.getOrCreateName(falseSuccessor) << ";\n";
-  output << "}\n";
-  return success();
-}
-
-static LogicalResult translateFailOp(IREE::VM::FailOp failOp,
-                                     mlir::emitc::CppEmitter &emitter,
-                                     bool hasRefs) {
-  llvm::raw_ostream &output = emitter.ostream();
-
-  auto status = failOp.status();
-
-  if (hasRefs) {
-    output << "VM_REF_ARRAY_RELEASE(local_refs);\n";
-  }
-
-  output << "return vm_fail_or_ok(" << emitter.getOrCreateName(status)
-         << ", iree_make_cstring_view(\"" << failOp.message() << "\"));\n";
-  return success();
-}
-
-static LogicalResult translateReturnOpToC(
-    IREE::VM::ReturnOp returnOp, mlir::emitc::CppEmitter &emitter,
-    SmallVector<std::string, 4> resultNames, bool hasRefs) {
-  llvm::raw_ostream &output = emitter.ostream();
-
-  for (std::tuple<Value, std::string> tuple :
-       llvm::zip(returnOp.getOperands(), resultNames)) {
-    Value operand = std::get<0>(tuple);
-    std::string resultName = std::get<1>(tuple);
-    output << "*" << resultName << " = " << emitter.getOrCreateName(operand)
-           << ";\n";
-  }
-
-  if (hasRefs) {
-    output << "VM_REF_ARRAY_RELEASE(local_refs);\n";
-  }
-
-  output << "return iree_ok_status();\n";
-
-  return success();
-}
-
-static LogicalResult translateOpToC(Operation &op,
-                                    mlir::emitc::CppEmitter &emitter,
-                                    SmallVector<std::string, 4> resultNames,
-                                    bool hasRefs) {
-  LogicalResult status =
-      llvm::TypeSwitch<Operation *, LogicalResult>(&op)
-          .Case<IREE::VM::BranchOp, IREE::VM::CondBranchOp>(
-              [&](auto op) { return translateOp(op, emitter); })
-          .Case<IREE::VM::FailOp>(
-              [&](auto op) { return translateFailOp(op, emitter, hasRefs); })
-          .Case<IREE::VM::ReturnOp>([&](auto op) {
-            return translateReturnOpToC(op, emitter, resultNames, hasRefs);
-          })
-          // Fall back to generic emitc printer
-          .Default([&](Operation *) {
-            return emitter.emitOperation(op, /*trailingSemicolon=*/true);
-          });
-
-  if (failed(status)) return failure();
-
-  return success();
-}
-
-static LogicalResult translateFunctionToC(IREE::VM::ModuleOp &moduleOp,
-                                          IREE::VM::FuncOp &funcOp,
-                                          mlir::emitc::CppEmitter &emitter,
-                                          bool declareOnly) {
-  std::string moduleName = moduleOp.getName().str();
-  emitc::CppEmitter::Scope scope(emitter);
-  llvm::raw_ostream &output = emitter.ostream();
-
-  // this function later gets wrapped with argument marshalling code
-  std::string functionName =
-      buildFunctionName(moduleOp, funcOp, /*implSuffix=*/true);
-
-  output << "static iree_status_t " << functionName << "(";
-
-  if (failed(printFuncOpArguments(funcOp, emitter))) {
-    return failure();
-  }
-
-  if (funcOp.getNumResults() > 0 && funcOp.getNumArguments() > 0) {
-    output << ", ";
-  }
-
-  SmallVector<std::string, 4> resultNames;
-  for (unsigned int idx = 0; idx < funcOp.getNumResults(); idx++) {
-    std::string resultName = "out" + std::to_string(idx);
-    resultNames.push_back(resultName);
-  }
-
-  if (failed(printFuncOpResults(funcOp, emitter, resultNames))) {
-    return failure();
-  }
-
-  if (funcOp.getNumArguments() + funcOp.getNumResults() > 0) {
-    output << ", ";
-  }
-
-  output << "iree_vm_stack_t* stack, ";
-
-  // TODO(simon-camp): We can't represent structs in emitc (yet maybe), so the
-  // struct argument name here must not be changed.
-  output << moduleName << "_state_t* state)";
-
-  if (declareOnly) {
-    output << ";\n";
-    return success();
-  }
-  output << " {\n";
-
-  // We forward declare all result variables except for the ones with RefType.
-  output << "// VARIABLE DECLARATIONS\n";
-  output << "// RESULTS\n";
-  for (auto &op : funcOp.getOps()) {
-    for (auto result : op.getResults()) {
-      if (result.getType().isa<IREE::VM::RefType>()) {
-        continue;
-      }
-      if (failed(emitter.emitVariableDeclaration(result,
-                                                 /*trailingSemicolon=*/true))) {
-        return op.emitError() << "Unable to declare result variable for op";
-      }
-    }
-  }
-  output << "// BASIC BLOCK ARGUMENTS\n";
-
-  auto &blocks = funcOp.getBlocks();
-  // Create label names for basic blocks.
-  for (auto &block : blocks) {
-    emitter.getOrCreateName(block);
-  }
-
-  // Emit variables for basic block arguments (omitting the first).
-  for (auto it = std::next(blocks.begin()); it != blocks.end(); ++it) {
-    Block &block = *it;
-    for (auto &arg : block.getArguments()) {
-      if (emitter.hasValueInScope(arg)) {
-        // This shouldn't happen
-        return failure();
-      }
-      if (failed(emitter.emitType(*funcOp.getOperation(), arg.getType()))) {
-        return failure();
-      }
-      output << " " << emitter.getOrCreateName(arg) << ";\n";
-    }
-  }
-
-  output << "// END VARIABLE DECLARATIONS\n";
-
-  // We reuse the register allocation pass and emit an array for all Values with
-  // ref type instead of generating one variable per Value. This makes the
-  // deallocation process easier for us.
-
-  RegisterAllocation registerAllocation;
-  if (failed(registerAllocation.recalculate(funcOp))) {
-    return funcOp.emitOpError() << "unable to perform register allocation";
-  }
-
-  // TODO(simon-camp): We sometimes get a to high number of refs used. This may
-  // be because the IR is in a mixed state of VM and EmitC dialects and the
-  // register allocation pass doesn't handle the 'emitc.opaque' type correctly.
-  // We could either
-  //  - annotate the function with the correct number of refs in the
-  //    conversion or
-  //  - define the array in the conversion (which would need to be
-  //    done through a macro at the moment because array types are not handled
-  //    by EmitC).
-  const size_t numRefs = registerAllocation.getMaxRefRegisterOrdinal() + 1;
-  const bool hasRefs = numRefs > 0;
-
-  if (hasRefs) {
-    auto ref_initializers = SmallVector<StringRef, 4>{numRefs, "{0}"};
-    output << "iree_vm_ref_t local_refs[" << numRefs << "] = {"
-           << llvm::join(ref_initializers, ", ") << "};\n";
-  }
-
-  for (auto &block : blocks) {
-    // Only print a label if there is more than one block.
-    if (blocks.size() > 1) {
-      if (failed(emitter.emitLabel(block))) {
-        return funcOp.emitOpError() << "Unable to print label for basic block";
-      }
-    }
-    for (Operation &op : block.getOperations()) {
-      if (failed(
-              translateOpToC(op, emitter, resultNames, /*hasRefs=*/hasRefs))) {
-        return failure();
-      }
-    }
-  }
-
-  output << "}\n";
-
-  return success();
-}
-
 static LogicalResult buildModuleDescriptors(IREE::VM::ModuleOp &moduleOp,
                                             mlir::emitc::CppEmitter &emitter) {
   SymbolTable symbolTable(moduleOp);
   std::string moduleName = moduleOp.getName().str();
   llvm::raw_ostream &output = emitter.ostream();
 
-  // function wrapper
-  for (auto funcOp : moduleOp.getOps<IREE::VM::FuncOp>()) {
-    output << "static iree_status_t "
-           << buildFunctionName(moduleOp, funcOp,
-                                /*implSufffix=*/false)
-           << "("
-           << "iree_vm_stack_t* stack, " << moduleName << "_t* module, "
-           << moduleName << "_state_t* state";
-
-    if (funcOp.getNumArguments() > 0) {
-      output << ", ";
-    }
-
-    if (failed(printFuncOpArguments(funcOp, emitter))) {
-      return failure();
-    }
-
-    if (funcOp.getNumResults() > 0) {
-      output << ", ";
-    }
-
-    SmallVector<std::string, 4> resultNames;
-    for (unsigned int idx = 0; idx < funcOp.getNumResults(); idx++) {
-      std::string resultName = "out" + std::to_string(idx);
-      resultNames.push_back(resultName);
-    }
-
-    if (failed(printFuncOpResults(funcOp, emitter, resultNames))) {
-      return failure();
-    }
-    output << ") {\n"
-           << "return "
-           << buildFunctionName(moduleOp, funcOp,
-                                /*implSufffix=*/true)
-           << "(";
-
-    SmallVector<std::string, 4> argNames;
-    for (Value &argument : funcOp.getArguments()) {
-      std::string argName = emitter.getOrCreateName(argument).str();
-      argNames.push_back(argName);
-    }
-
-    for (std::string &resultName : resultNames) {
-      argNames.push_back(resultName);
-    }
-
-    argNames.push_back("stack");
-    argNames.push_back("state");
-
-    output << llvm::join(argNames, ", ");
-    output << ");\n}\n";
-  }
-
-  auto printCStringView = [](std::string s) -> std::string {
-    return "iree_make_cstring_view(\"" + s + "\")";
+  auto printCStringView = [](StringRef s) -> std::string {
+    return ("iree_make_cstring_view(\"" + s + "\")").str();
   };
 
   // exports
@@ -515,18 +162,20 @@
   });
 
   for (auto exportOp : exportOps) {
-    auto funcOp = symbolTable.lookup<IREE::VM::FuncOp>(exportOp.function_ref());
+    StringRef funcName = exportOp.function_ref();
+    auto funcOp = symbolTable.lookup<mlir::FuncOp>(funcName);
     if (!funcOp) {
       return exportOp.emitError("Couldn't find referenced FuncOp");
     }
-    auto callingConvention = makeCallingConventionString(funcOp);
+    StringAttr callingConvention =
+        funcOp.getOperation()->getAttr("calling_convention").cast<StringAttr>();
     if (!callingConvention) {
       return exportOp.emitError(
           "Couldn't create calling convention string for referenced FuncOp");
     }
 
     // TODO(simon-camp): support function-level reflection attributes
-    output << "{" << printCStringView(exportOp.export_name().str()) << ", "
+    output << "{" << printCStringView(exportOp.export_name()) << ", "
            << printCStringView(callingConvention.getValue()) << ", 0, NULL},\n";
   }
   output << "};\n";
@@ -545,7 +194,7 @@
   });
 
   for (auto importOp : importOps) {
-    output << "{" << printCStringView(importOp.getName().str()) << "},\n";
+    output << "{" << printCStringView(importOp.getName()) << "},\n";
   }
   output << "};\n";
   output << "\n";
@@ -558,7 +207,8 @@
   // We only add exported functions to the table, as calls to internal functions
   // are directly mapped to C function calls of the generated implementation.
   for (auto exportOp : exportOps) {
-    auto funcOp = symbolTable.lookup<IREE::VM::FuncOp>(exportOp.function_ref());
+    StringRef funcName = exportOp.function_ref();
+    auto funcOp = symbolTable.lookup<mlir::FuncOp>(funcName);
     if (!funcOp) {
       return exportOp.emitError("Couldn't find referenced FuncOp");
     }
@@ -569,9 +219,7 @@
       return funcOp.emitError("Couldn't create calling convention string");
     }
     output << ", "
-           << "(iree_vm_native_function_target_t)"
-           << buildFunctionName(moduleOp, funcOp, /*implSufffix=*/false)
-           << "},\n";
+           << "(iree_vm_native_function_target_t)" << funcName << "},\n";
   }
   output << "};\n";
   output << "\n";
@@ -753,6 +401,7 @@
 
   printInclude("iree/vm/api.h");
   printInclude("iree/vm/ops.h");
+  printInclude("iree/vm/ops_emitc.h");
   printInclude("iree/vm/shims_emitc.h");
   printInclude("iree/vm/value.h");
   output << "\n";
@@ -772,28 +421,39 @@
     return failure();
   }
 
+  // translate functions
   output << "// DECLARE FUNCTIONS\n";
 
-  // forward declare functions
-  for (auto funcOp : moduleOp.getOps<IREE::VM::FuncOp>()) {
-    if (failed(translateFunctionToC(moduleOp, funcOp, emitter,
-                                    /*declareOnly=*/true))) {
-      return failure();
-    }
+  for (auto funcOp : moduleOp.getOps<mlir::FuncOp>()) {
+    Operation *op = funcOp.getOperation();
+    if (op->hasAttr("emitc.static")) output << "static ";
 
-    output << "\n";
+    if (failed(emitter.emitTypes(*funcOp.getOperation(),
+                                 funcOp.getType().getResults())))
+      return failure();
+    output << " " << funcOp.getName();
+
+    output << "(";
+
+    bool error = false;
+    llvm::interleaveComma(
+        funcOp.getArguments(), output, [&](BlockArgument arg) {
+          if (failed(emitter.emitType(*funcOp.getOperation(), arg.getType())))
+            error = true;
+        });
+    if (error) return failure();
+    output << ");\n";
   }
 
   output << "// DEFINE FUNCTIONS\n";
 
-  // translate functions
-  for (auto funcOp : moduleOp.getOps<IREE::VM::FuncOp>()) {
-    if (failed(translateFunctionToC(moduleOp, funcOp, emitter,
-                                    /*declareOnly=*/false))) {
+  for (auto funcOp : moduleOp.getOps<mlir::FuncOp>()) {
+    Operation *op = funcOp.getOperation();
+    if (op->hasAttr("emitc.static")) output << "static ";
+    if (failed(emitter.emitOperation(*funcOp.getOperation(),
+                                     /*trailingSemicolon=*/
+                                     false)))
       return failure();
-    }
-
-    output << "\n";
   }
 
   printSeparatingComment(output);
diff --git a/iree/compiler/Dialect/VM/Target/C/test/add.mlir b/iree/compiler/Dialect/VM/Target/C/test/add.mlir
index 84a28e6..7fe35b5 100644
--- a/iree/compiler/Dialect/VM/Target/C/test/add.mlir
+++ b/iree/compiler/Dialect/VM/Target/C/test/add.mlir
@@ -2,21 +2,19 @@
 
 // CHECK: #include "iree/vm/ops.h"
 vm.module @add_module {
-  // CHECK: static iree_status_t add_module_add_1_impl(int32_t v1, int32_t v2, int32_t *out0, int32_t *out1, iree_vm_stack_t* stack, add_module_state_t* state) {
+  // CHECK: static iree_status_t add_module_add_1(iree_vm_stack_t* v1, add_module_t* v2, add_module_state_t* v3, int32_t v4, int32_t v5, int32_t* v6, int32_t* v7) {
   vm.func @add_1(%arg0 : i32, %arg1 : i32) -> (i32, i32) {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: int32_t v3;
-    // CHECK-NEXT: int32_t v4;
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: v3 = vm_add_i32(v1, v2);
+    // CHECK-NEXT: int32_t v8;
+    // CHECK-NEXT: int32_t v9;
+    // CHECK-NEXT: iree_status_t v10;
+    // CHECK-NEXT: v8 = vm_add_i32(v4, v5);
     %0 = vm.add.i32 %arg0, %arg1 : i32
-    // CHECK-NEXT: v4 = vm_add_i32(v3, v3);
+    // CHECK-NEXT: v9 = vm_add_i32(v8, v8);
     %1 = vm.add.i32 %0, %0 : i32
-    // CHECK-NEXT: *out0 = v3;
-    // CHECK-NEXT: *out1 = v4;
-    // CHECK-NEXT: return iree_ok_status();
+    // CHECK-NEXT: EMITC_DEREF_ASSIGN(v6, v8);
+    // CHECK-NEXT: EMITC_DEREF_ASSIGN(v7, v9);
+    // CHECK-NEXT: v10 = iree_ok_status();
+    // CHECK-NEXT: return v10;
     vm.return %0, %1 : i32, i32
   }
 }
diff --git a/iree/compiler/Dialect/VM/Target/C/test/calling_convention.mlir b/iree/compiler/Dialect/VM/Target/C/test/calling_convention.mlir
index 3bc1f95..66c9ebe 100644
--- a/iree/compiler/Dialect/VM/Target/C/test/calling_convention.mlir
+++ b/iree/compiler/Dialect/VM/Target/C/test/calling_convention.mlir
@@ -2,51 +2,43 @@
 
 // CHECK: #include "iree/vm/ops.h"
 vm.module @calling_convention_test {
-  // CHECK: static iree_status_t calling_convention_test_no_in_no_return_impl(iree_vm_stack_t* stack, calling_convention_test_state_t* state) {
+  // CHECK: static iree_status_t calling_convention_test_no_in_no_return(iree_vm_stack_t* v1, calling_convention_test_t* v2, calling_convention_test_state_t* v3) {
   vm.func @no_in_no_return() -> () {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: return iree_ok_status();
+    // CHECK-NEXT: iree_status_t v4;
+    // CHECK-NEXT: v4 = iree_ok_status();
+    // CHECK-NEXT: return v4;
     vm.return
   }
 
-  // CHECK: static iree_status_t calling_convention_test_i32_in_no_return_impl(int32_t v1, iree_vm_stack_t* stack, calling_convention_test_state_t* state) {
+  // CHECK: static iree_status_t calling_convention_test_i32_in_no_return(iree_vm_stack_t* v1, calling_convention_test_t* v2, calling_convention_test_state_t* v3, int32_t v4) {
   vm.func @i32_in_no_return(%arg0 : i32) -> () {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: return iree_ok_status();
+    // CHECK-NEXT: iree_status_t v5;
+    // CHECK-NEXT: v5 = iree_ok_status();
+    // CHECK-NEXT: return v5;
     vm.return
   }
 
-  // CHECK: static iree_status_t calling_convention_test_no_in_i32_return_impl(int32_t *out0, iree_vm_stack_t* stack, calling_convention_test_state_t* state) {
+  // CHECK: static iree_status_t calling_convention_test_no_in_i32_return(iree_vm_stack_t* v1, calling_convention_test_t* v2, calling_convention_test_state_t* v3, int32_t* v4) {
   vm.func @no_in_i32_return() -> (i32) {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: int32_t v1;
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: v1 = 32;
+    // CHECK-NEXT: int32_t v5;
+    // CHECK-NEXT: iree_status_t v6;
+    // CHECK-NEXT: v5 = 32;
     %0 = vm.const.i32 32 : i32
-    // CHECK-NEXT: *out0 = v1;
-    // CHECK-NEXT: return iree_ok_status();
+    // CHECK-NEXT: EMITC_DEREF_ASSIGN(v4, v5);
+    // CHECK-NEXT: v6 = iree_ok_status();
+    // CHECK-NEXT: return v6;
     vm.return %0 : i32
   }
 
-  // CHECK: static iree_status_t calling_convention_test_i32_in_i32_return_impl(int32_t v1, int32_t *out0, iree_vm_stack_t* stack, calling_convention_test_state_t* state) {
+  // CHECK: static iree_status_t calling_convention_test_i32_in_i32_return(iree_vm_stack_t* v1, calling_convention_test_t* v2, calling_convention_test_state_t* v3, int32_t v4, int32_t* v5) {
   vm.func @i32_in_i32_return(%arg0 : i32) -> (i32) {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: int32_t v2;
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: v2 = 32;
+    // CHECK-NEXT: int32_t v6;
+    // CHECK-NEXT: iree_status_t v7;
+    // CHECK-NEXT: v6 = 32;
     %0 = vm.const.i32 32 : i32
-    // CHECK-NEXT: *out0 = v2;
-    // CHECK-NEXT: return iree_ok_status();
+    // CHECK-NEXT: EMITC_DEREF_ASSIGN(v5, v6);
+    // CHECK-NEXT: v7 = iree_ok_status();
+    // CHECK-NEXT: return v7;
     vm.return %0 : i32
   }
 }
diff --git a/iree/compiler/Dialect/VM/Target/C/test/control_flow.mlir b/iree/compiler/Dialect/VM/Target/C/test/control_flow.mlir
index 88d4efd..6a6fcf7 100644
--- a/iree/compiler/Dialect/VM/Target/C/test/control_flow.mlir
+++ b/iree/compiler/Dialect/VM/Target/C/test/control_flow.mlir
@@ -15,18 +15,19 @@
     vm.return %0 : i32
   }
 }
-// CHECK: static iree_status_t control_flow_module_control_flow_test_impl(int32_t [[A:[^ ]*]], int32_t [[COND:[^ ]*]], int32_t *[[RESULT:[^ ]*]], iree_vm_stack_t* stack, control_flow_module_state_t* [[STATE:[^ ]*]]) {
-  // CHECK-NEXT: VARIABLE DECLARATIONS
-  // CHECK-NEXT: RESULTS
+// CHECK: static iree_status_t control_flow_module_control_flow_test(iree_vm_stack_t* v1, control_flow_module_t* v2, control_flow_module_state_t* v3, int32_t [[A:[^ ]*]], int32_t [[COND:[^ ]*]], int32_t* [[RESULT:[^ ]*]]) {
+  // CHECK-NEXT: int32_t [[COND_NZ:[^ ]*]];
+  // CHECK-NEXT: bool [[COND_BOOL:[^ ]*]];
   // CHECK-NEXT: int32_t [[B:[^ ]*]];
   // CHECK-NEXT: int32_t [[V0:[^ ]*]];
-  // CHECK-NEXT: BASIC BLOCK ARGUMENTS
+  // CHECK-NEXT: iree_status_t [[STATUS:[^ ]*]];
   // CHECK-NEXT: int32_t [[C:[^ ]*]];
   // CHECK-NEXT: int32_t [[D:[^ ]*]];
   // CHECK-NEXT: int32_t [[E:[^ ]*]];
-  // CHECK-NEXT: END VARIABLE DECLARATIONS
   // CHECK-NEXT: [[BB0:[^ ]*]]:
-  // CHECK-NEXT: if ([[COND]]) {
+  // CHECK-NEXT: [[COND_NZ]] = vm_cmp_nz_i32([[COND]]);
+  // CHECK-NEXT: [[COND_BOOL]] = EMITC_CAST([[COND_NZ]], bool);
+  // CHECK-NEXT: if ([[COND_BOOL]]) {
   // CHECK-NEXT: goto [[BB1:[^ ]*]];
   // CHECK-NEXT: } else {
   // CHECK-NEXT: goto [[BB2:[^ ]*]];
@@ -44,5 +45,6 @@
   // CHECK-NEXT: goto [[BB4:[^ ]*]];
   // CHECK-NEXT: [[BB4]]:
   // CHECK-NEXT: [[V0]] = vm_add_i32([[D]], [[E]]);
-  // CHECK-NEXT: *[[RESULT]] = [[V0]];
-  // CHECK-NEXT: return iree_ok_status();
+  // CHECK-NEXT: EMITC_DEREF_ASSIGN([[RESULT]], [[V0]]);
+  // CHECK-NEXT: [[STATUS]] = iree_ok_status();
+  // CHECK-NEXT: return [[STATUS]];
diff --git a/iree/compiler/Dialect/VM/Target/C/test/global_ops.mlir b/iree/compiler/Dialect/VM/Target/C/test/global_ops.mlir
index 4becd58..a7b1e59 100644
--- a/iree/compiler/Dialect/VM/Target/C/test/global_ops.mlir
+++ b/iree/compiler/Dialect/VM/Target/C/test/global_ops.mlir
@@ -10,39 +10,39 @@
   // CHECK-NEXT: iree_vm_function_t imports[0];
   // CHECK-NEXT: };
 
-  vm.global.i32 @c42 : i32 = 42 : i32
-  vm.global.i32 mutable @c107_mut : i32 = 107 : i32
+  vm.global.i32 mutable @c42 = 42 : i32
+  vm.global.i32 mutable @c107_mut = 107 : i32
 
   // Skip forward declarations
   // CHECK: DEFINE FUNCTIONS
 
   vm.export @test_global_load_i32
-  // CHECK: static iree_status_t global_ops_test_global_load_i32_impl(
+  // CHECK: static iree_status_t global_ops_test_global_load_i32(
   vm.func @test_global_load_i32() -> i32 {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: int32_t v1;
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: v1 = vm_global_load_i32(state->rwdata, 0);
+    // CHECK-NEXT: uint8_t* v5;
+    // CHECK-NEXT: int32_t v6;
+    // CHECK-NEXT: iree_status_t v7;
+    // CHECK-NEXT: v5 = EMITC_STRUCT_PTR_MEMBER(v3, rwdata);
+    // CHECK-NEXT: v6 = vm_global_load_i32(v5, 0);
     %value = vm.global.load.i32 @c42 : i32
     vm.return %value : i32
   }
 
   vm.export @test_global_store_i32
-  // CHECK: static iree_status_t global_ops_test_global_store_i32_impl(
+  // CHECK: static iree_status_t global_ops_test_global_store_i32(
   vm.func @test_global_store_i32() -> i32 {
-    // CHECK-NEXT: VARIABLE DECLARATIONS
-    // CHECK-NEXT: RESULTS
-    // CHECK-NEXT: int32_t v1;
-    // CHECK-NEXT: int32_t v2;
-    // CHECK-NEXT: BASIC BLOCK ARGUMENTS
-    // CHECK-NEXT: END VARIABLE DECLARATIONS
-    // CHECK-NEXT: v1 = 17;
+    // CHECK-NEXT: int32_t v5;
+// CHECK-NEXT: uint8_t* v6;
+    // CHECK-NEXT: uint8_t* v7;
+    // CHECK-NEXT: int32_t v8;
+    // CHECK-NEXT: iree_status_t v9;
+    // CHECK-NEXT: v5 = 17;
     %c17 = vm.const.i32 17 : i32
-    // CHECK-NEXT: vm_global_store_i32(state->rwdata, 4, v1);
+    // CHECK-NEXT: v6 = EMITC_STRUCT_PTR_MEMBER(v3, rwdata);
+    // CHECK-NEXT: vm_global_store_i32(v6, 4, v5);
     vm.global.store.i32 %c17, @c107_mut : i32
-    // CHECK-NEXT: v2 = vm_global_load_i32(state->rwdata, 4);
+    // CHECK-NEXT: v7 = EMITC_STRUCT_PTR_MEMBER(v3, rwdata);
+    // CHECK-NEXT: v8 = vm_global_load_i32(v7, 4);
     %value = vm.global.load.i32 @c107_mut : i32
     vm.return %value : i32
   }
diff --git a/iree/vm/BUILD b/iree/vm/BUILD
index dc4e4f7..7966acf 100644
--- a/iree/vm/BUILD
+++ b/iree/vm/BUILD
@@ -319,6 +319,13 @@
 )
 
 cc_library(
+    name = "ops_emitc",
+    hdrs = [
+        "ops_emitc.h",
+    ],
+)
+
+cc_library(
     name = "shims_emitc",
     hdrs = [
         "shims_emitc.h",
diff --git a/iree/vm/CMakeLists.txt b/iree/vm/CMakeLists.txt
index 63530da..32971cb 100644
--- a/iree/vm/CMakeLists.txt
+++ b/iree/vm/CMakeLists.txt
@@ -286,6 +286,14 @@
 
 iree_cc_library(
   NAME
+    ops_emitc
+  HDRS
+    "ops_emitc.h"
+  PUBLIC
+)
+
+iree_cc_library(
+  NAME
     shims_emitc
   HDRS
     "shims_emitc.h"
diff --git a/iree/vm/ops.h b/iree/vm/ops.h
index af6fb69..9525da8 100644
--- a/iree/vm/ops.h
+++ b/iree/vm/ops.h
@@ -136,19 +136,6 @@
 }
 
 //===------------------------------------------------------------------===//
-// Control flow ops
-//===------------------------------------------------------------------===//
-
-static inline iree_status_t vm_fail_or_ok(int32_t status_code,
-                                          iree_string_view_t message) {
-  if (status_code != 0) {
-    return iree_status_allocate(IREE_STATUS_FAILED_PRECONDITION, "<vm>", 0,
-                                message);
-  }
-  return iree_ok_status();
-}
-
-//===------------------------------------------------------------------===//
 // ExtI64: Globals
 //===------------------------------------------------------------------===//
 
@@ -364,39 +351,4 @@
   return isnan(operand) ? 1 : 0;
 }
 
-//===------------------------------------------------------------------===//
-// Utility macros (Used for things that EmitC can't handle)
-//===------------------------------------------------------------------===//
-
-// Get the address of an array element
-#define VM_ARRAY_ELEMENT_ADDRESS(array, index) &array[index]
-
-// Release all refs from the given array
-#define VM_REF_ARRAY_RELEASE(array)                          \
-  for (int i = 0; i < IREE_ARRAYSIZE(array); i++) {          \
-    iree_vm_ref_release(VM_ARRAY_ELEMENT_ADDRESS(array, i)); \
-  }
-
-#define VM_REF_RELEASE_IF_TYPE_MISMATCH(ref, type_def) \
-  if (ref->type != IREE_VM_REF_TYPE_NULL &&            \
-      (iree_vm_type_def_is_value(type_def) ||          \
-       ref->type != type_def->ref_type)) {             \
-    iree_vm_ref_release(ref);                          \
-  }
-
-// TODO(simon-camp): This macro should resemble the error handling part of the
-// IREE_RETURN_IF_ERROR macro. There are two different definitions in
-// iree/base/api.h depending on a feature flag.
-#define VM_RETURN_IF_ERROR(status, array) \
-  if (status) {                           \
-    VM_REF_ARRAY_RELEASE(array);          \
-    return status;                        \
-  }
-
-#define VM_RETURN_IF_LIST_NULL(list, array)                \
-  if (!list) {                                             \
-    VM_REF_ARRAY_RELEASE(array);                           \
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT); \
-  }
-
 #endif  // IREE_VM_OPS_H_
diff --git a/iree/vm/ops_emitc.h b/iree/vm/ops_emitc.h
new file mode 100644
index 0000000..77134a2
--- /dev/null
+++ b/iree/vm/ops_emitc.h
@@ -0,0 +1,35 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_OPS_EMITC_H_
+#define IREE_VM_OPS_EMITC_H_
+
+// This file contains utility functions and macros used for things that EmitC
+// can't handle directly.
+
+// Assign a value through a pointer variable
+#define EMITC_DEREF_ASSIGN(ptr, value) *(ptr) = (value)
+
+// Access a member of a struct
+#define EMITC_STRUCT_MEMBER(struct, member) (struct).member
+
+// Access a member of a pointer to a struct
+#define EMITC_STRUCT_PTR_MEMBER(struct, member) (struct)->member
+
+// Get the address of an array element
+#define EMITC_ARRAY_ELEMENT_ADDRESS(array, index) &(array)[index]
+
+// Unary operations
+#define EMITC_CAST(arg, type) ((type)(arg))
+#define EMITC_NOT(arg) (!(arg))
+
+// Binary operations
+#define EMITC_AND(lhs, rhs) ((lhs) && (rhs))
+#define EMITC_EQ(lhs, rhs) ((lhs) == (rhs))
+#define EMITC_NE(lhs, rhs) ((lhs) != (rhs))
+#define EMITC_OR(lhs, rhs) ((lhs) || (rhs))
+
+#endif  // IREE_VM_OPS_EMITC_H_
diff --git a/iree/vm/test/call_ops.mlir b/iree/vm/test/call_ops.mlir
index fa9800e..9909935 100644
--- a/iree/vm/test/call_ops.mlir
+++ b/iree/vm/test/call_ops.mlir
@@ -38,9 +38,8 @@
     vm.return
   }
 
-  // TODO(simon-camp): The EmitC conversion doesn't support multiple return values.
-  vm.export @test_call_v_ii attributes {emitc.exclude}
-  vm.func private @test_call_v_ii() {
+  vm.export @test_call_v_ii
+  vm.func @test_call_v_ii() {
     %c1 = vm.const.i32 1 : i32
     %c2 = vm.const.i32 2 : i32
     %0:2 = vm.call @_v_ii() : () -> (i32, i32)
diff --git a/iree/vm/test/emitc/CMakeLists.txt b/iree/vm/test/emitc/CMakeLists.txt
index 702ea1e..c2a2073 100644
--- a/iree/vm/test/emitc/CMakeLists.txt
+++ b/iree/vm/test/emitc/CMakeLists.txt
@@ -20,6 +20,7 @@
     iree::testing::gtest_main
     iree::vm::cc
     iree::vm::ops
+    iree::vm::ops_emitc
     iree::vm::shims_emitc
     ::arithmetic_ops
     ::arithmetic_ops_f32