Merge pull request #3564 from rsuderman:main-to-google

PiperOrigin-RevId: 338296154
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d12969b..b2c30b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,7 @@
 option(IREE_BUILD_SAMPLES "Builds IREE sample projects." ON)
 option(IREE_BUILD_DEBUGGER "Builds the IREE debugger app." OFF)
 option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" OFF)
+option(IREE_BUILD_JAVA_BINDINGS "Builds the IREE java bindings." OFF)
 option(IREE_BUILD_EXPERIMENTAL "Builds experimental projects." OFF)
 
 set(IREE_HAL_DRIVERS_TO_BUILD "all"
@@ -495,6 +496,11 @@
   add_subdirectory(bindings/python)
 endif()
 
+if(${IREE_BUILD_JAVA_BINDINGS})
+  add_subdirectory(bindings/java)
+  add_subdirectory(bindings/javatests)
+endif()
+
 add_subdirectory(iree/tools)
 
 if(${IREE_BUILD_SAMPLES})
diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt
new file mode 100644
index 0000000..3f263df
--- /dev/null
+++ b/bindings/java/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(com/google/iree/native)
diff --git a/bindings/java/com/google/iree/native/CMakeLists.txt b/bindings/java/com/google/iree/native/CMakeLists.txt
new file mode 100644
index 0000000..e749258
--- /dev/null
+++ b/bindings/java/com/google/iree/native/CMakeLists.txt
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+iree_cc_library(
+  NAME
+    cc_wrappers
+  SRCS
+     "context_wrapper.cc"
+     "function_wrapper.cc"
+     "instance_wrapper.cc"
+     "module_wrapper.cc"
+  HDRS
+    "context_wrapper.h"
+    "function_wrapper.h"
+    "instance_wrapper.h"
+    "module_wrapper.h"
+  DEPS
+    iree::base::api
+    iree::base::init
+    iree::base::logging
+    iree::base::status
+    iree::hal::api
+    iree::hal::vmla::vmla_driver_module
+    iree::modules::hal
+    iree::modules::strings::strings_module
+    iree::modules::tensorlist::native_module
+    iree::vm::bytecode_module
+    iree::vm::context
+    iree::vm::instance
+    iree::vm::ref_cc
+)
diff --git a/bindings/java/com/google/iree/native/instance_wrapper.cc b/bindings/java/com/google/iree/native/instance_wrapper.cc
index adcee70..cdd9fb0 100644
--- a/bindings/java/com/google/iree/native/instance_wrapper.cc
+++ b/bindings/java/com/google/iree/native/instance_wrapper.cc
@@ -14,6 +14,8 @@
 
 #include "bindings/java/com/google/iree/native/instance_wrapper.h"
 
+#include <mutex>
+
 #include "iree/base/init.h"
 #include "iree/modules/hal/hal_module.h"
 #include "iree/modules/strings/strings_module.h"
diff --git a/bindings/javatests/CMakeLists.txt b/bindings/javatests/CMakeLists.txt
new file mode 100644
index 0000000..268f43b
--- /dev/null
+++ b/bindings/javatests/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(com/google/iree)
diff --git a/bindings/javatests/com/google/iree/CMakeLists.txt b/bindings/javatests/com/google/iree/CMakeLists.txt
new file mode 100644
index 0000000..15f9896
--- /dev/null
+++ b/bindings/javatests/com/google/iree/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+iree_bytecode_module(
+  NAME
+    simple_mul_bytecode_module
+  SRC
+    "simple_mul.mlir"
+  CC_NAMESPACE
+    "iree::java"
+  FLAGS
+    "-iree-mlir-to-vm-bytecode-module"
+    "-iree-hal-target-backends=vmla"
+)
+
+iree_cc_binary(
+  NAME
+    integration_test
+   SRCS
+    "integration_test.cc"
+  DEPS
+     bindings::java::com::google::iree::native::cc_wrappers
+     bindings::javatests::com::google::iree::simple_mul_bytecode_module_cc
+     iree::base::status
+)
diff --git a/build_tools/cmake/iree_cross_compile.cmake b/build_tools/cmake/iree_cross_compile.cmake
index b52b4d2..b8c1fde 100644
--- a/build_tools/cmake/iree_cross_compile.cmake
+++ b/build_tools/cmake/iree_cross_compile.cmake
@@ -85,6 +85,7 @@
   iree_to_bool(_CONFIG_BUILD_SAMPLES "${IREE_${CONFIG_NAME}_BUILD_SAMPLES}")
   iree_to_bool(_CONFIG_BUILD_DEBUGGER "${IREE_${CONFIG_NAME}_BUILD_DEBUGGER}")
   iree_to_bool(_CONFIG_BUILD_PYTHON_BINDINGS "${IREE_${CONFIG_NAME}_BUILD_PYTHON_BINDINGS}")
+  iree_to_bool(_CONFIG_BUILD_JAVA_BINDINGS "${IREE_${CONFIG_NAME}_BUILD_JAVA_BINDINGS}")
   iree_to_bool(_CONFIG_BUILD_EXPERIMENTAL "${IREE_${CONFIG_NAME}_BUILD_EXPERIMENTAL}")
 
   # Escape semicolons in the targets list so that CMake doesn't expand them to
@@ -115,6 +116,7 @@
         -DIREE_BUILD_SAMPLES=${_CONFIG_BUILD_SAMPLES}
         -DIREE_BUILD_DEBUGGER=${_CONFIG_BUILD_DEBUGGER}
         -DIREE_BUILD_PYTHON_BINDINGS=${_CONFIG_BUILD_PYTHON_BINDINGS}
+        -DIREE_BUILD_JAVA_BINDINGS=${_CONFIG_BUILD_JAVA_BINDINGS}
         -DIREE_BUILD_EXPERIMENTAL=${_CONFIG_BUILD_EXPERIMENTAL}
         # LINT.ThenChange(
         #   https://github.com/google/iree/tree/main/CMakeLists.txt:iree_options,
diff --git a/docs/get_started/cmake_options_and_variables.md b/docs/get_started/cmake_options_and_variables.md
index 9886149..18fff58 100644
--- a/docs/get_started/cmake_options_and_variables.md
+++ b/docs/get_started/cmake_options_and_variables.md
@@ -56,6 +56,10 @@
 
 Builds the IREE python bindings. Defaults to `OFF`.
 
+#### `IREE_BUILD_JAVA_BINDINGS`:BOOL
+
+Builds the IREE java bindings. Defaults to `OFF`.
+
 #### `IREE_BUILD_EXPERIMENTAL`:BOOL
 
 Builds experimental projects. Defaults to `OFF`.
diff --git a/integrations/tensorflow/e2e/BUILD b/integrations/tensorflow/e2e/BUILD
index 97ede05..ff88406 100644
--- a/integrations/tensorflow/e2e/BUILD
+++ b/integrations/tensorflow/e2e/BUILD
@@ -67,6 +67,7 @@
     "einsum_dynamic_test.py",
     "einsum_static_test.py",
     "einsum_vector_test.py",
+    "fft_test.py",
     "finite_test.py",
     "gather_test.py",
     "mandelbrot_test.py",
@@ -84,6 +85,7 @@
     "einsum_dynamic_test.py",
     "einsum_static_test.py",
     "einsum_vector_test.py",
+    "fft_test.py",  # TODO(natashaknk): Get this working after kernel is in.
     "mandelbrot_test.py",  # TODO(silvasean): Get this working on IREE.
     "ring_buffer_test.py",  # TODO(b/148747011)
     "strings_test.py",
@@ -98,6 +100,7 @@
     "einsum_dynamic_test.py",
     "einsum_static_test.py",
     "einsum_vector_test.py",
+    "fft_test.py",  # TODO(natashaknk): Get this working after kernel is in.
     "fill_test.py",  # TODO(jennik): Get this test working on IREE.
     "logical_ops_test.py",
     "mandelbrot_test.py",  # TODO(silvasean): Get this working on IREE.
@@ -118,6 +121,7 @@
     "einsum_dynamic_test.py",
     "einsum_static_test.py",
     "einsum_vector_test.py",
+    "fft_test.py",  # TODO(natashaknk): Get this working after kernel is in.
     "fill_test.py",  # TODO(jennik): Get this test working on IREE.
     "logical_ops_test.py",
     "mandelbrot_test.py",  # TODO(silvasean): Get this working on IREE.
diff --git a/integrations/tensorflow/e2e/fft_test.py b/integrations/tensorflow/e2e/fft_test.py
new file mode 100644
index 0000000..590bff2
--- /dev/null
+++ b/integrations/tensorflow/e2e/fft_test.py
@@ -0,0 +1,75 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from absl import app
+import numpy as np
+from pyiree.tf.support import tf_test_utils
+import tensorflow.compat.v2 as tf
+
+
+class FftModule(tf.Module):
+  # TODO(natashaknk) when multiple outputs are supported, make into one test.
+  @tf.function(input_signature=[
+      tf.TensorSpec([4], tf.float32),
+      tf.TensorSpec([4], tf.float32)
+  ])
+  def fft_real(self, real_array, imag_array):
+    complex_in = tf.complex(real_array, imag_array)
+    complex_out = tf.signal.fft(complex_in)
+    return tf.math.real(complex_out)
+
+  @tf.function(input_signature=[
+      tf.TensorSpec([4], tf.float32),
+      tf.TensorSpec([4], tf.float32)
+  ])
+  def fft_imag(self, real_array, imag_array):
+    complex_in = tf.complex(real_array, imag_array)
+    complex_out = tf.signal.fft(complex_in)
+    return tf.math.imag(complex_out)
+
+
+class FftTest(tf_test_utils.TracedModuleTestCase):
+
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    self._modules = tf_test_utils.compile_tf_module(FftModule)
+
+  def test_fft_real(self):
+
+    def fft_real(module):
+      real_array = np.array([9., 1., 4.5, -0.3], dtype=np.float32)
+      imag_array = np.array([0., -1., 17.7, 10.], dtype=np.float32)
+      module.fft_real(real_array, imag_array)
+
+    self.compare_backends(fft_real, self._modules)
+
+  def test_fft_imag(self):
+
+    def fft_imag(module):
+      real_array = np.array([9., 1., 4.5, -0.3], dtype=np.float32)
+      imag_array = np.array([0., -1., 17.7, 10.], dtype=np.float32)
+      module.fft_imag(real_array, imag_array)
+
+    self.compare_backends(fft_imag, self._modules)
+
+
+def main(argv):
+  del argv  # Unused
+  if hasattr(tf, 'enable_v2_behavior'):
+    tf.enable_v2_behavior()
+  tf.test.main()
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/iree/base/api.h b/iree/base/api.h
index d733293..61df05c 100644
--- a/iree/base/api.h
+++ b/iree/base/api.h
@@ -540,6 +540,13 @@
   if (IREE_UNLIKELY(var)) {                                                  \
     return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__);              \
   }
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, ...)  \
+  iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_(                           \
+      IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_GET_EXPR_)(__VA_ARGS__))); \
+  if (IREE_UNLIKELY(var)) {                                                  \
+    (tail_expr);                                                             \
+    return IREE_STATUS_IMPL_ANNOTATE_SWITCH_(var, __VA_ARGS__);              \
+  }
 #define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
   iree_status_t var = (expr);                     \
   if (IREE_UNLIKELY(var)) iree_status_ignore(var);
@@ -552,6 +559,14 @@
 #define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, expr, ...) \
   iree_status_t var = (expr);                                 \
   if (IREE_UNLIKELY(var)) return var;
+#undef IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_
+#define IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(tail_expr, var, expr, \
+                                                       ...)                  \
+  iree_status_t var = (expr);                                                \
+  if (IREE_UNLIKELY(var)) {                                                  \
+    (tail_expr);                                                             \
+    return var;                                                              \
+  }
 #undef IREE_STATUS_IMPL_IGNORE_ERROR_
 #define IREE_STATUS_IMPL_IGNORE_ERROR_(var, expr) \
   iree_status_t var = (expr);                     \
@@ -593,6 +608,12 @@
       IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
       IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
 
+// IREE_RETURN_IF_ERROR with a custom expression to evaluate before returning.
+#define IREE_RETURN_AND_EVAL_IF_ERROR(tail_expr, ...)              \
+  IREE_STATUS_IMPL_RETURN_AND_EVAL_IF_API_ERROR_(                  \
+      tail_expr, IREE_STATUS_IMPL_CONCAT_(__status_, __COUNTER__), \
+      IREE_STATUS_IMPL_IDENTITY_(IREE_STATUS_IMPL_IDENTITY_(__VA_ARGS__)))
+
 // Ignores the status result of (expr) regardless of its value.
 //
 // Example:
diff --git a/iree/base/flatbuffer_util.cc b/iree/base/flatbuffer_util.cc
index 379be73..117aea2 100644
--- a/iree/base/flatbuffer_util.cc
+++ b/iree/base/flatbuffer_util.cc
@@ -66,8 +66,7 @@
                                       std::function<void()> deleter,
                                       size_t root_type_size,
                                       VerifierFn verifier_fn) {
-  IREE_TRACE_SCOPE("FlatBufferFileBase::FromBuffer:size", int)
-  (static_cast<int>(buffer_data.size()));
+  IREE_TRACE_SCOPE();
 
   // Sanity check buffer for the minimum size as FlatBuffers doesn't.
   if (buffer_data.size() < 16) {
diff --git a/iree/base/tracing.h b/iree/base/tracing.h
index c8594de..a3226ad 100644
--- a/iree/base/tracing.h
+++ b/iree/base/tracing.h
@@ -263,12 +263,12 @@
 
 // Begins a new zone with the given runtime dynamic string name.
 // The |value| string will be copied into the trace buffer.
-#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length)   \
-  static const iree_tracing_location_t TracyConcat(                       \
-      __tracy_source_location, __LINE__) = {NULL, __FUNCTION__, __FILE__, \
-                                            (uint32_t)__LINE__, 0};       \
-  iree_zone_id_t zone_id = iree_tracing_zone_begin_impl(                  \
-      &TracyConcat(__tracy_source_location, __LINE__), name, name_length);
+#define IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(zone_id, name, name_length) \
+  static const iree_tracing_location_t TracyConcat(                     \
+      __tracy_source_location, __LINE__) = {0, __FUNCTION__, __FILE__,  \
+                                            (uint32_t)__LINE__, 0};     \
+  iree_zone_id_t zone_id = iree_tracing_zone_begin_impl(                \
+      &TracyConcat(__tracy_source_location, __LINE__), (name), (name_length));
 
 // Begins an externally defined zone with a dynamic source location.
 // The |file_name|, |function_name|, and optional |name| strings will be copied
@@ -280,6 +280,10 @@
       file_name, file_name_length, line, function_name, function_name_length, \
       name, name_length)
 
+// Appends an integer value to the parent zone. May be called multiple times.
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value) \
+  ___tracy_emit_zone_value((struct ___tracy_c_zone_context){zone_id, 1}, value);
+
 // Appends a string value to the parent zone. May be called multiple times.
 // The |value| string will be copied into the trace buffer.
 #define IREE_TRACE_ZONE_APPEND_TEXT(...)                                  \
@@ -297,6 +301,11 @@
 #define IREE_TRACE_ZONE_END(zone_id) \
   ___tracy_emit_zone_end((struct ___tracy_c_zone_context){zone_id, 1})
 
+// Ends the current zone before returning on a failure.
+// Sugar for IREE_TRACE_ZONE_END+IREE_RETURN_IF_ERROR.
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+  IREE_RETURN_AND_EVAL_IF_ERROR(IREE_TRACE_ZONE_END(zone_id), __VA_ARGS__)
+
 // Configures the named plot with an IREE_TRACING_PLOT_TYPE_* representation.
 #define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type) \
   iree_tracing_set_plot_type_impl(name_literal, plot_type)
@@ -347,8 +356,11 @@
 #define IREE_TRACE_ZONE_BEGIN_EXTERNAL(                        \
     zone_id, file_name, file_name_length, line, function_name, \
     function_name_length, name, name_length)
+#define IREE_TRACE_ZONE_APPEND_VALUE(zone_id, value)
 #define IREE_TRACE_ZONE_APPEND_TEXT(zone_id, value, value_length)
 #define IREE_TRACE_ZONE_END(zone_id)
+#define IREE_RETURN_AND_END_ZONE_IF_ERROR(zone_id, ...) \
+  IREE_RETURN_IF_ERROR(__VA_ARGS__)
 #define IREE_TRACE_SET_PLOT_TYPE(name_literal, plot_type)
 #define IREE_TRACE_PLOT_VALUE_I64(name_literal, value)
 #define IREE_TRACE_PLOT_VALUE_F32(name_literal, value)
@@ -411,17 +423,20 @@
 #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 
 // TODO(#1886): update these to tracy and drop the 0.
-#define IREE_TRACE_SCOPE0(name_spec) ZoneScopedN(name_spec)
-#define IREE_TRACE_SCOPE(name_spec, ...)
-#define IREE_TRACE_EVENT0
+#define IREE_TRACE_SCOPE() ZoneScoped
+#define IREE_TRACE_SCOPE_DYNAMIC(name_cstr) \
+  ZoneTransientN(___tracy_scoped_zone, name_cstr, true)
+#define IREE_TRACE_SCOPE0(name_literal) ZoneScopedN(name_literal)
 #define IREE_TRACE_EVENT
+#define IREE_TRACE_EVENT0
 
 #else
 #define IREE_TRACE_THREAD_ENABLE(name)
-#define IREE_TRACE_SCOPE0(name_spec)
-#define IREE_TRACE_SCOPE(name_spec, ...) (void)
-#define IREE_TRACE_EVENT0
+#define IREE_TRACE_SCOPE()
+#define IREE_TRACE_SCOPE_DYNAMIC(name_string_view)
+#define IREE_TRACE_SCOPE0(name_literal)
 #define IREE_TRACE_EVENT(void)
+#define IREE_TRACE_EVENT0
 #endif  // IREE_TRACING_FEATURE_INSTRUMENTATION
 
 // TODO(benvanik): macros for LockableCtx / Lockable mutex tracking.
diff --git a/iree/compiler/Dialect/VMLA/Conversion/VMLAToVM/ConvertVMLAToVM.cpp b/iree/compiler/Dialect/VMLA/Conversion/VMLAToVM/ConvertVMLAToVM.cpp
index 895a8e5..ef9238e 100644
--- a/iree/compiler/Dialect/VMLA/Conversion/VMLAToVM/ConvertVMLAToVM.cpp
+++ b/iree/compiler/Dialect/VMLA/Conversion/VMLAToVM/ConvertVMLAToVM.cpp
@@ -267,6 +267,16 @@
            getTypedTypeStr(op.dst_type());
   }
 };
+
+class VMLAFftImportOpConversion
+    : public VMLAImportOpConversion<IREE::VMLA::FftOp> {
+ public:
+  using VMLAImportOpConversion<IREE::VMLA::FftOp>::VMLAImportOpConversion;
+
+  std::string getImportSuffix(IREE::VMLA::FftOp op) const override {
+    return std::string(".") + getTypedTypeStr(op.real_element_type());
+  }
+};
 }  // namespace
 
 void populateVMLAToVMPatterns(MLIRContext *context,
@@ -343,6 +353,8 @@
       context, importSymbols, typeConverter, "vmla.batch.matmul");
   patterns.insert<VMLAConvImportOpConversion>(context, importSymbols,
                                               typeConverter, "vmla.conv");
+  patterns.insert<VMLAFftImportOpConversion>(context, importSymbols,
+                                             typeConverter, "vmla.fft");
 
   VMLA_TYPED_IMPORT_OP(IREE::VMLA::ReduceSumOp, "vmla.reduce.sum");
   VMLA_TYPED_IMPORT_OP(IREE::VMLA::ReduceMinOp, "vmla.reduce.min");
diff --git a/iree/compiler/Dialect/VMLA/vmla.imports.mlir b/iree/compiler/Dialect/VMLA/vmla.imports.mlir
index ff575b0..2302fe1 100644
--- a/iree/compiler/Dialect/VMLA/vmla.imports.mlir
+++ b/iree/compiler/Dialect/VMLA/vmla.imports.mlir
@@ -347,6 +347,12 @@
   %src : !vm.ref<!vmla.buffer>, %src_shape : i32 ...,
   %dst : !vm.ref<!vmla.buffer>)
 
+vm.import @fft.f32(
+  %real_src : !vm.ref<!vmla.buffer>, %real_src_shape : i32 ...,
+  %imag_src : !vm.ref<!vmla.buffer>, %imag_src_shape : i32 ...,
+  %real_dst : !vm.ref<!vmla.buffer>,
+  %imag_dst : !vm.ref<!vmla.buffer>)
+
 //===----------------------------------------------------------------------===//
 // VMLA Ops: conversion
 //===----------------------------------------------------------------------===//
diff --git a/iree/hal/device_manager.cc b/iree/hal/device_manager.cc
index d3c3301..5a527f3 100644
--- a/iree/hal/device_manager.cc
+++ b/iree/hal/device_manager.cc
@@ -108,8 +108,7 @@
     MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
     device_size_t allocation_size,
     absl::Span<const DevicePlacement> device_placements) {
-  IREE_TRACE_SCOPE("DeviceManager::TryAllocateDeviceVisibleBuffer:size", int)
-  (static_cast<int>(allocation_size));
+  IREE_TRACE_SCOPE0("DeviceManager::TryAllocateDeviceVisibleBuffer:size");
   if (!AnyBitSet(memory_type & MemoryType::kHostLocal)) {
     return InvalidArgumentErrorBuilder(IREE_LOC)
            << "Host-local buffers require the kHostLocal bit: "
@@ -138,8 +137,7 @@
     MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
     device_size_t allocation_size,
     absl::Span<const DevicePlacement> device_placements) {
-  IREE_TRACE_SCOPE("DeviceManager::AllocateDeviceVisibleBuffer:size", int)
-  (static_cast<int>(allocation_size));
+  IREE_TRACE_SCOPE0("DeviceManager::AllocateDeviceVisibleBuffer:size");
   if (!AnyBitSet(memory_type & MemoryType::kHostLocal)) {
     return InvalidArgumentErrorBuilder(IREE_LOC)
            << "Host-local buffers require the kHostLocal bit: "
@@ -160,8 +158,7 @@
     MemoryTypeBitfield memory_type, BufferUsageBitfield buffer_usage,
     device_size_t allocation_size,
     absl::Span<const DevicePlacement> device_placements) {
-  IREE_TRACE_SCOPE("DeviceManager::AllocateDeviceLocalBuffer:size", int)
-  (static_cast<int>(allocation_size));
+  IREE_TRACE_SCOPE0("DeviceManager::AllocateDeviceLocalBuffer:size");
   if (!AnyBitSet(memory_type & MemoryType::kDeviceLocal)) {
     return InvalidArgumentErrorBuilder(IREE_LOC)
            << "Device-local buffers require the kDeviceLocal bit: "
diff --git a/iree/hal/dylib/dylib_executable.cc b/iree/hal/dylib/dylib_executable.cc
index def1786..10dbf29 100644
--- a/iree/hal/dylib/dylib_executable.cc
+++ b/iree/hal/dylib/dylib_executable.cc
@@ -16,7 +16,6 @@
 
 #include "flatbuffers/flatbuffers.h"
 #include "iree/base/file_io.h"
-#include "iree/base/tracing.h"
 #include "iree/schemas/dylib_executable_def_generated.h"
 
 namespace iree {
@@ -89,6 +88,10 @@
              << "Could not find symbol: " << entry_points[i];
     }
     entry_functions_[i] = symbol;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+    entry_names_[i] = entry_points[i]->c_str();
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
   }
 
   return OkStatus();
@@ -96,6 +99,11 @@
 
 struct DyLibDispatchState : public HostExecutable::DispatchState {
   DyLibDispatchState() = default;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  const char* entry_name = nullptr;
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
   void* entry_function = nullptr;
   absl::InlinedVector<void*, 4> args;
   absl::InlinedVector<int32_t, 4> push_constant;
@@ -111,6 +119,9 @@
   }
 
   auto dispatch_state = make_ref<DyLibDispatchState>();
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  dispatch_state->entry_name = entry_names_[params.entry_point];
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
   dispatch_state->entry_function = entry_functions_[params.entry_point];
 
   for (size_t set = 0; set < params.set_bindings.size(); ++set) {
@@ -135,8 +146,8 @@
 
 Status DyLibExecutable::DispatchTile(DispatchState* state,
                                      std::array<uint32_t, 3> workgroup_xyz) {
-  IREE_TRACE_SCOPE0("DyLibExecutable::DispatchTile");
   auto* dispatch_state = static_cast<DyLibDispatchState*>(state);
+  IREE_TRACE_SCOPE_DYNAMIC(dispatch_state->entry_name);
 
   auto entry_function =
       (void (*)(void**, int32_t*))dispatch_state->entry_function;
diff --git a/iree/hal/dylib/dylib_executable.h b/iree/hal/dylib/dylib_executable.h
index 63c4a26..210be15 100644
--- a/iree/hal/dylib/dylib_executable.h
+++ b/iree/hal/dylib/dylib_executable.h
@@ -21,6 +21,7 @@
 #include "absl/container/inlined_vector.h"
 #include "iree/base/dynamic_library.h"
 #include "iree/base/status.h"
+#include "iree/base/tracing.h"
 #include "iree/hal/executable_spec.h"
 #include "iree/hal/host/host_executable.h"
 
@@ -50,6 +51,10 @@
   std::string executable_library_temp_path_;
   std::unique_ptr<DynamicLibrary> executable_library_;
   absl::InlinedVector<void*, 4> entry_functions_;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  absl::InlinedVector<const char*, 4> entry_names_;
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 };
 
 }  // namespace dylib
diff --git a/iree/hal/host/BUILD b/iree/hal/host/BUILD
index c7b28aa..a26fa0f 100644
--- a/iree/hal/host/BUILD
+++ b/iree/hal/host/BUILD
@@ -55,6 +55,7 @@
     deps = [
         "//iree/base:logging",
         "//iree/base:status",
+        "//iree/base:tracing",
         "//iree/hal:buffer",
     ],
 )
diff --git a/iree/hal/host/CMakeLists.txt b/iree/hal/host/CMakeLists.txt
index 803d7b6..953a777 100644
--- a/iree/hal/host/CMakeLists.txt
+++ b/iree/hal/host/CMakeLists.txt
@@ -55,6 +55,7 @@
   DEPS
     iree::base::logging
     iree::base::status
+    iree::base::tracing
     iree::hal::buffer
   PUBLIC
 )
diff --git a/iree/hal/host/host_buffer.cc b/iree/hal/host/host_buffer.cc
index c5016f3..265e048 100644
--- a/iree/hal/host/host_buffer.cc
+++ b/iree/hal/host/host_buffer.cc
@@ -20,6 +20,7 @@
 
 #include "iree/base/logging.h"
 #include "iree/base/status.h"
+#include "iree/base/tracing.h"
 
 namespace iree {
 namespace hal {
@@ -36,6 +37,7 @@
       owns_data_(owns_data) {}
 
 HostBuffer::~HostBuffer() {
+  IREE_TRACE_SCOPE();
   if (owns_data_ && data_) {
     std::free(data_);
     data_ = nullptr;
diff --git a/iree/hal/vmla/op_kernels.h b/iree/hal/vmla/op_kernels.h
index ba5b8bc..1d84e46 100644
--- a/iree/hal/vmla/op_kernels.h
+++ b/iree/hal/vmla/op_kernels.h
@@ -174,6 +174,15 @@
                         absl::Span<int32_t> dst_buffer, ShapeSpan src_shape);
 };
 
+struct Fft {
+  template <typename T>
+  static Status Execute(absl::Span<const T> real_src_buffer,
+                        absl::Span<const T> imag_src_buffer,
+                        absl::Span<T> real_dst_buffer,
+                        absl::Span<T> imag_dst_buffer, ShapeSpan real_src_shape,
+                        ShapeSpan imag_src_shape);
+};
+
 struct Broadcast {
   template <typename T>
   static Status Execute(absl::Span<const T> src_buffer,
diff --git a/iree/hal/vmla/op_kernels_generic.h b/iree/hal/vmla/op_kernels_generic.h
index 0b4f904..166a2b8 100644
--- a/iree/hal/vmla/op_kernels_generic.h
+++ b/iree/hal/vmla/op_kernels_generic.h
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cmath>
 #include <iostream>
+#include <iterator>
 #include <numeric>
 
 #include "absl/container/flat_hash_set.h"
@@ -541,6 +542,18 @@
 }
 
 template <typename T>
+Status Fft::Execute(absl::Span<const T> real_src_buffer,
+                    absl::Span<const T> imag_src_buffer,
+                    absl::Span<T> real_dst_buffer,
+                    absl::Span<T> imag_dst_buffer, ShapeSpan real_src_shape,
+                    ShapeSpan imag_src_shape) {
+  // TODO (natashaknk): implement
+  std::fill(real_dst_buffer.begin(), real_dst_buffer.end(), 1);
+  std::fill(imag_dst_buffer.begin(), imag_dst_buffer.end(), 2);
+  return OkStatus();
+}
+
+template <typename T>
 Status Broadcast::Execute(absl::Span<const T> src_buffer,
                           absl::Span<T> dst_buffer) {
   for (size_t i = 0; i < dst_buffer.size(); ++i) {
diff --git a/iree/hal/vmla/vmla_executable.cc b/iree/hal/vmla/vmla_executable.cc
index 336c610..4cc447a 100644
--- a/iree/hal/vmla/vmla_executable.cc
+++ b/iree/hal/vmla/vmla_executable.cc
@@ -156,8 +156,9 @@
 
 Status VMLAExecutable::DispatchTile(DispatchState* state,
                                     std::array<uint32_t, 3> workgroup_xyz) {
-  IREE_TRACE_SCOPE0("VMLAExecutable::DispatchTile");
   auto* dispatch_state = static_cast<VMLADispatchState*>(state);
+  IREE_TRACE_SCOPE_DYNAMIC(
+      iree_vm_function_name(&dispatch_state->function).data);
 
   auto* input_list_storage = alloca(dispatch_state->input_list_size);
   iree_vm_list_t* input_list = nullptr;
diff --git a/iree/hal/vmla/vmla_module.cc b/iree/hal/vmla/vmla_module.cc
index 5852de0..1d791c0 100644
--- a/iree/hal/vmla/vmla_module.cc
+++ b/iree/hal/vmla/vmla_module.cc
@@ -655,6 +655,19 @@
   IREE_VMLA_SORT_OP(SortI32, int32_t);
   IREE_VMLA_SORT_OP(SortF32, float);
 
+  Status FftF32(const vm::ref<Buffer>& real_src,
+                iree_vmla_shape_t real_src_shape,
+                const vm::ref<Buffer>& imag_src,
+                iree_vmla_shape_t imag_src_shape,
+                const vm::ref<Buffer>& real_dst,
+                const vm::ref<Buffer>& imag_dst) {
+    IREE_TRACE_SCOPE0("VMLAModuleState::FftF32");
+    IREE_RETURN_IF_ERROR(kernels::Fft::Execute<float>(
+        real_src->As<float>(), imag_src->As<float>(), real_dst->As<float>(),
+        imag_dst->As<float>(), real_src_shape, imag_src_shape));
+    return OkStatus();
+  }
+
   //===--------------------------------------------------------------------===//
   // VMLA Ops: conversion
   //===--------------------------------------------------------------------===//
@@ -987,6 +1000,7 @@
     vm::MakeNativeFunction("sort.i16", &VMLAModuleState::SortI16),
     vm::MakeNativeFunction("sort.i32", &VMLAModuleState::SortI32),
     vm::MakeNativeFunction("sort.f32", &VMLAModuleState::SortF32),
+    vm::MakeNativeFunction("fft.f32", &VMLAModuleState::FftF32),
     vm::MakeNativeFunction("finite.f32", &VMLAModuleState::FiniteF32),
 
     vm::MakeNativeFunction("convert.i8.i16", &VMLAModuleState::ConvertI8I16),
diff --git a/iree/modules/hal/hal_module.cc b/iree/modules/hal/hal_module.cc
index c990cf0..be8a730 100644
--- a/iree/modules/hal/hal_module.cc
+++ b/iree/modules/hal/hal_module.cc
@@ -156,10 +156,13 @@
     IREE_RETURN_IF_ERROR(iree_hal_semaphore_wait_with_deadline(
         semaphore.get(), 1ull, IREE_TIME_INFINITE_FUTURE));
 
-    for (auto& ref : deferred_releases_) {
-      iree_vm_ref_release(&ref);
+    {
+      IREE_TRACE_SCOPE0("HALModuleState::DeferredReleases");
+      for (auto& ref : deferred_releases_) {
+        iree_vm_ref_release(&ref);
+      }
+      deferred_releases_.clear();
     }
-    deferred_releases_.clear();
 
     return OkStatus();
   }
diff --git a/iree/tools/iree-benchmark-module-main.cc b/iree/tools/iree-benchmark-module-main.cc
index 96dc0c9..af0ab9f 100644
--- a/iree/tools/iree-benchmark-module-main.cc
+++ b/iree/tools/iree-benchmark-module-main.cc
@@ -59,38 +59,39 @@
 namespace iree {
 namespace {
 
+static void BenchmarkFunction(
+    const std::string& benchmark_name, iree_vm_context_t* context,
+    iree_vm_function_t function, iree_vm_list_t* inputs,
+    const std::vector<RawSignatureParser::Description>& output_descs,
+    benchmark::State& state) {
+  IREE_TRACE_SCOPE_DYNAMIC(benchmark_name.c_str());
+  IREE_TRACE_FRAME_MARK();
+
+  // Benchmarking loop.
+  for (auto _ : state) {
+    IREE_TRACE_SCOPE0("BenchmarkIteration");
+    IREE_TRACE_FRAME_MARK_NAMED("Iteration");
+    vm::ref<iree_vm_list_t> outputs;
+    IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
+                                      output_descs.size(),
+                                      iree_allocator_system(), &outputs));
+    IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr, inputs,
+                                 outputs.get(), iree_allocator_system()));
+  }
+}
+
 void RegisterModuleBenchmarks(
     const std::string& function_name, iree_vm_context_t* context,
     iree_vm_function_t function, iree_vm_list_t* inputs,
     const std::vector<RawSignatureParser::Description>& output_descs) {
   auto benchmark_name = "BM_" + function_name;
-  benchmark::RegisterBenchmark(
-      benchmark_name.c_str(),
-      [context, function, inputs,
-       output_descs](benchmark::State& state) -> void {
-        // Warmup run step.
-        {
-          vm::ref<iree_vm_list_t> outputs;
-          IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
-                                            output_descs.size(),
-                                            iree_allocator_system(), &outputs));
-          IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr,
-                                       inputs, outputs.get(),
-                                       iree_allocator_system()));
-        }
-        // Benchmarking loop.
-        for (auto _ : state) {
-          // No status conversions and conditional returns in the benchmarked
-          // inner loop.
-          vm::ref<iree_vm_list_t> outputs;
-          IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr,
-                                            output_descs.size(),
-                                            iree_allocator_system(), &outputs));
-          IREE_CHECK_OK(iree_vm_invoke(context, function, /*policy=*/nullptr,
-                                       inputs, outputs.get(),
-                                       iree_allocator_system()));
-        }
-      })
+  benchmark::RegisterBenchmark(benchmark_name.c_str(),
+                               [benchmark_name, context, function, inputs,
+                                output_descs](benchmark::State& state) -> void {
+                                 BenchmarkFunction(benchmark_name, context,
+                                                   function, inputs,
+                                                   output_descs, state);
+                               })
       // By default only the main thread is included in CPU time. Include all
       // the threads instead.
       ->MeasureProcessCPUTime()
@@ -107,6 +108,7 @@
 }
 
 Status GetModuleContentsFromFlags(std::string& module_data) {
+  IREE_TRACE_SCOPE0("GetModuleContentsFromFlags");
   auto module_file = absl::GetFlag(FLAGS_module_file);
   IREE_ASSIGN_OR_RETURN(module_data, file_io::GetFileContents(module_file));
   return iree::OkStatus();
@@ -127,6 +129,8 @@
         context_(nullptr),
         input_module_(nullptr){};
   ~IREEBenchmark() {
+    IREE_TRACE_SCOPE0("IREEBenchmark::dtor");
+
     // Order matters.
     inputs_.reset();
     iree_vm_module_release(hal_module_);
@@ -137,6 +141,8 @@
   };
 
   Status Register() {
+    IREE_TRACE_SCOPE0("IREEBenchmark::Register");
+
     if (!instance_ || !device_ || !hal_module_ || !context_ || !input_module_) {
       IREE_RETURN_IF_ERROR(Init());
     }
@@ -152,6 +158,9 @@
 
  private:
   Status Init() {
+    IREE_TRACE_SCOPE0("IREEBenchmark::Init");
+    IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init");
+
     IREE_RETURN_IF_ERROR(GetModuleContentsFromFlags(module_data_));
 
     IREE_RETURN_IF_ERROR(iree_hal_module_register_types());
@@ -170,10 +179,14 @@
     IREE_RETURN_IF_ERROR(iree_vm_context_create_with_modules(
         instance_, modules.data(), modules.size(), iree_allocator_system(),
         &context_));
+
+    IREE_TRACE_FRAME_MARK_END_NAMED("init");
     return iree::OkStatus();
   }
 
   Status RegisterSpecificFunction(const std::string& function_name) {
+    IREE_TRACE_SCOPE0("IREEBenchmark::RegisterSpecificFunction");
+
     iree_vm_function_t function;
     IREE_RETURN_IF_ERROR(input_module_->lookup_function(
         input_module_->self, IREE_VM_FUNCTION_LINKAGE_EXPORT,
@@ -203,6 +216,7 @@
   }
 
   Status RegisterAllExportedFunctions() {
+    IREE_TRACE_SCOPE0("IREEBenchmark::RegisterAllExportedFunctions");
     iree_vm_function_t function;
     iree_vm_module_signature_t signature =
         input_module_->signature(input_module_->self);
@@ -239,6 +253,8 @@
 }  // namespace iree
 
 int main(int argc, char** argv) {
+  IREE_TRACE_SCOPE0("main");
+
   // We have to contend with two flag parsing libraries here: absl's and
   // benchmark's. To make matters worse, both define the `--help` flag. To
   // ensure that each is able to parse its own flags, we use an absl "internal"
diff --git a/iree/vm/BUILD b/iree/vm/BUILD
index 07f1324..3f3d93e 100644
--- a/iree/vm/BUILD
+++ b/iree/vm/BUILD
@@ -170,6 +170,7 @@
         ":builtin_types",
         "//iree/base:api",
         "//iree/base:atomics",
+        "//iree/base:tracing",
     ],
 )
 
@@ -221,6 +222,7 @@
         "//iree/base:alignment",
         "//iree/base:api",
         "//iree/base:atomics",
+        "//iree/base:tracing",
     ],
 )
 
diff --git a/iree/vm/CMakeLists.txt b/iree/vm/CMakeLists.txt
index e17375e..48d6544 100644
--- a/iree/vm/CMakeLists.txt
+++ b/iree/vm/CMakeLists.txt
@@ -190,6 +190,7 @@
     ::builtin_types
     iree::base::api
     iree::base::atomics
+    iree::base::tracing
   PUBLIC
 )
 
@@ -251,6 +252,7 @@
     iree::base::alignment
     iree::base::api
     iree::base::atomics
+    iree::base::tracing
   PUBLIC
 )
 
diff --git a/iree/vm/bytecode_dispatch.c b/iree/vm/bytecode_dispatch.c
index 72ba1ab..bb5fbbf 100644
--- a/iree/vm/bytecode_dispatch.c
+++ b/iree/vm/bytecode_dispatch.c
@@ -14,6 +14,7 @@
 
 #include <string.h>
 
+#include "iree/base/tracing.h"
 #include "iree/vm/bytecode_dispatch_util.h"
 #include "iree/vm/list.h"
 
diff --git a/iree/vm/bytecode_module.c b/iree/vm/bytecode_module.c
index 8e564bd..caa89d3 100644
--- a/iree/vm/bytecode_module.c
+++ b/iree/vm/bytecode_module.c
@@ -16,6 +16,7 @@
 
 #include "iree/base/alignment.h"
 #include "iree/base/api.h"
+#include "iree/base/tracing.h"
 #include "iree/vm/bytecode_module_impl.h"
 #include "iree/vm/ref.h"
 #include "iree/vm/stack.h"
@@ -79,15 +80,18 @@
 // registered.
 static iree_status_t iree_vm_bytecode_module_resolve_types(
     iree_vm_TypeDef_vec_t type_defs, iree_vm_type_def_t* type_table) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   for (size_t i = 0; i < iree_vm_TypeDef_vec_len(type_defs); ++i) {
     iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(type_defs, i);
     type_table[i] = iree_vm_bytecode_module_resolve_type(type_def);
     if (!iree_vm_type_def_is_valid(type_table[i])) {
+      IREE_TRACE_ZONE_END(z0);
       return iree_make_status(IREE_STATUS_NOT_FOUND,
                               "no type registered with name '%s'",
                               iree_vm_TypeDef_full_name(type_def));
     }
   }
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
@@ -249,6 +253,7 @@
 
 static void iree_vm_bytecode_module_destroy(void* self) {
   iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
+  IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_allocator_free(module->flatbuffer_allocator,
                       (void*)module->flatbuffer_data.data);
@@ -256,6 +261,8 @@
   module->flatbuffer_allocator = iree_allocator_null();
 
   iree_allocator_free(module->allocator, module);
+
+  IREE_TRACE_ZONE_END(z0);
 }
 
 static iree_string_view_t iree_vm_bytecode_module_name(void* self) {
@@ -544,6 +551,7 @@
 static iree_status_t iree_vm_bytecode_module_alloc_state(
     void* self, iree_allocator_t allocator,
     iree_vm_module_state_t** out_module_state) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_module_state);
   *out_module_state = NULL;
 
@@ -556,8 +564,9 @@
 
   // Allocate the storage for the structure and all its nested tables.
   iree_vm_bytecode_module_state_t* state = NULL;
-  IREE_RETURN_IF_ERROR(iree_allocator_malloc(allocator, total_state_struct_size,
-                                             (void**)&state));
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_allocator_malloc(allocator, total_state_struct_size,
+                                (void**)&state));
   state->allocator = allocator;
 
   // Perform layout to get the pointers into the storage for each nested table.
@@ -577,12 +586,14 @@
   }
 
   *out_module_state = (iree_vm_module_state_t*)state;
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
 static void iree_vm_bytecode_module_free_state(
     void* self, iree_vm_module_state_t* module_state) {
   if (!module_state) return;
+  IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_vm_bytecode_module_state_t* state =
       (iree_vm_bytecode_module_state_t*)module_state;
@@ -593,6 +604,8 @@
   }
 
   iree_allocator_free(state->allocator, module_state);
+
+  IREE_TRACE_ZONE_END(z0);
 }
 
 static iree_status_t iree_vm_bytecode_module_resolve_import(
@@ -645,7 +658,7 @@
   // NOTE: any work here adds directly to the invocation time. Avoid doing too
   // much work or touching too many unlikely-to-be-cached structures (such as
   // walking the FlatBuffer, which may cause page faults).
-
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_result);
   memset(out_result, 0, sizeof(iree_vm_execution_result_t));
 
@@ -653,12 +666,15 @@
   // allow exports here as well to make things easier to call externally.
   iree_vm_function_t function = call->function;
   if (function.linkage != IREE_VM_FUNCTION_LINKAGE_INTERNAL) {
-    IREE_RETURN_IF_ERROR(iree_vm_bytecode_module_get_function(
-        self, function.linkage, function.ordinal, &function, NULL, NULL));
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0,
+        iree_vm_bytecode_module_get_function(
+            self, function.linkage, function.ordinal, &function, NULL, NULL));
   }
 
   iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
   if (function.ordinal >= module->function_descriptor_count) {
+    IREE_TRACE_ZONE_END(z0);
     return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
                             "function ordinal out of range (0 < %u < %zu)",
                             function.ordinal,
@@ -688,28 +704,40 @@
       flatbuffers_string_len(calling_convention);
   iree_string_view_t cconv_arguments = iree_string_view_empty();
   iree_string_view_t cconv_results = iree_string_view_empty();
-  IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
-      &signature, &cconv_arguments, &cconv_results));
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_vm_function_call_get_cconv_fragments(
+              &signature, &cconv_arguments, &cconv_results));
 
   // Jump into the dispatch routine to execute bytecode until the function
   // either returns (synchronous) or yields (asynchronous).
-  return iree_vm_bytecode_dispatch(stack, module, call, cconv_arguments,
-                                   cconv_results, out_result);
+  iree_status_t status = iree_vm_bytecode_dispatch(
+      stack, module, call, cconv_arguments, cconv_results, out_result);
+  IREE_TRACE_ZONE_END(z0);
+  return status;
 }
 
 IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_bytecode_module_create(
     iree_const_byte_span_t flatbuffer_data,
     iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
     iree_vm_module_t** out_module) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_module);
   *out_module = NULL;
 
-  IREE_RETURN_IF_ERROR(
-      iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data));
+  IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_module_flatbuffer_verify");
+  iree_status_t status =
+      iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data);
+  if (!iree_status_is_ok(status)) {
+    IREE_TRACE_ZONE_END(z1);
+    IREE_TRACE_ZONE_END(z0);
+    return status;
+  }
+  IREE_TRACE_ZONE_END(z1);
 
   iree_vm_BytecodeModuleDef_table_t module_def =
       iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
   if (!module_def) {
+    IREE_TRACE_ZONE_END(z0);
     return iree_make_status(
         IREE_STATUS_INVALID_ARGUMENT,
         "failed getting root from flatbuffer; expected identifier "
@@ -721,9 +749,10 @@
       iree_vm_TypeDef_vec_len(type_defs) * sizeof(iree_vm_type_def_t);
 
   iree_vm_bytecode_module_t* module = NULL;
-  IREE_RETURN_IF_ERROR(iree_allocator_malloc(
-      allocator, sizeof(iree_vm_bytecode_module_t) + type_table_size,
-      (void**)&module));
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_allocator_malloc(
+              allocator, sizeof(iree_vm_bytecode_module_t) + type_table_size,
+              (void**)&module));
   module->allocator = allocator;
 
   iree_vm_FunctionDescriptor_vec_t function_descriptors =
@@ -748,6 +777,7 @@
       iree_vm_bytecode_module_resolve_types(type_defs, module->type_table);
   if (!iree_status_is_ok(resolve_status)) {
     iree_allocator_free(allocator, module);
+    IREE_TRACE_ZONE_END(z0);
     return resolve_status;
   }
 
@@ -765,5 +795,6 @@
       iree_vm_bytecode_module_get_function_reflection_attr;
 
   *out_module = &module->interface;
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
diff --git a/iree/vm/context.c b/iree/vm/context.c
index e0d2b8b..e211adc 100644
--- a/iree/vm/context.c
+++ b/iree/vm/context.c
@@ -89,6 +89,8 @@
 static iree_status_t iree_vm_context_resolve_module_imports(
     iree_vm_context_t* context, iree_vm_module_t* module,
     iree_vm_module_state_t* module_state) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
   // NOTE: this has some bad characteristics, but the number of modules and the
   // number of imported functions should be relatively small (even if the number
   // of exported functions for particular modules is large).
@@ -96,7 +98,8 @@
   for (int i = 0; i < module_signature.import_function_count; ++i) {
     iree_string_view_t full_name;
     iree_vm_function_signature_t expected_signature;
-    IREE_RETURN_IF_ERROR(
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0,
         module->get_function(module->self, IREE_VM_FUNCTION_LINKAGE_IMPORT, i,
                              /*out_function=*/NULL,
                              /*out_name=*/&full_name,
@@ -105,7 +108,8 @@
     // Resolve the function to the module that contains it and return the
     // information.
     iree_vm_function_t import_function;
-    IREE_RETURN_IF_ERROR(
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0,
         iree_vm_context_resolve_function(context, full_name, &import_function));
 
     // Query the function signature from the module that contains it; we don't
@@ -127,6 +131,7 @@
     if (expected_signature.calling_convention.size &&
         !iree_string_view_equal(import_signature.calling_convention,
                                 expected_signature.calling_convention)) {
+      IREE_TRACE_ZONE_END(z0);
       return iree_make_status(
           IREE_STATUS_INTERNAL,
           "import function signature mismatch between %.*s "
@@ -141,9 +146,12 @@
           import_signature.calling_convention.data);
     }
 
-    IREE_RETURN_IF_ERROR(module->resolve_import(
-        module->self, module_state, i, &import_function, &import_signature));
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0, module->resolve_import(module->self, module_state, i,
+                                   &import_function, &import_signature));
   }
+
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
@@ -199,6 +207,7 @@
     iree_vm_instance_t* instance, iree_vm_module_t** modules,
     iree_host_size_t module_count, iree_allocator_t allocator,
     iree_vm_context_t** out_context) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(instance);
   IREE_ASSERT_ARGUMENT(out_context);
   *out_context = NULL;
@@ -231,10 +240,12 @@
       iree_vm_context_register_modules(context, modules, module_count);
   if (!iree_status_is_ok(register_status)) {
     iree_vm_context_destroy(context);
+    IREE_TRACE_ZONE_END(z0);
     return register_status;
   }
 
   *out_context = context;
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
@@ -317,9 +328,12 @@
     }
   }
 
+  IREE_TRACE_ZONE_BEGIN(z0);
+
   // Try growing both our storage lists first, if needed.
   if (context->list.count + module_count > context->list.capacity) {
     if (context->is_static) {
+      IREE_TRACE_ZONE_END(z0);
       return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
                               "context was allocated as static and cannot "
                               "register modules after creation");
@@ -330,13 +344,16 @@
       new_capacity = context->list.capacity * 2;
     }
     iree_vm_module_t** new_module_list;
-    IREE_RETURN_IF_ERROR(iree_allocator_malloc(
-        context->allocator, sizeof(iree_vm_module_t*) * new_capacity,
-        (void**)&new_module_list));
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0, iree_allocator_malloc(context->allocator,
+                                  sizeof(iree_vm_module_t*) * new_capacity,
+                                  (void**)&new_module_list));
     iree_vm_module_state_t** new_module_state_list;
-    IREE_RETURN_IF_ERROR(iree_allocator_malloc(
-        context->allocator, sizeof(iree_vm_module_state_t*) * new_capacity,
-        (void**)&new_module_state_list));
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0,
+        iree_allocator_malloc(context->allocator,
+                              sizeof(iree_vm_module_state_t*) * new_capacity,
+                              (void**)&new_module_state_list));
     memcpy(new_module_list, context->list.modules,
            sizeof(iree_vm_module_t*) * context->list.count);
     memcpy(new_module_state_list, context->list.module_states,
@@ -409,12 +426,14 @@
     context->list.count = original_count;
   }
 
+  IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
 IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_context_resolve_function(
     const iree_vm_context_t* context, iree_string_view_t full_name,
     iree_vm_function_t* out_function) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_function);
   memset(out_function, 0, sizeof(iree_vm_function_t));
 
@@ -422,6 +441,7 @@
   iree_string_view_t function_name;
   if (iree_string_view_split(full_name, '.', &module_name, &function_name) ==
       -1) {
+    IREE_TRACE_ZONE_END(z0);
     return iree_make_status(
         IREE_STATUS_INVALID_ARGUMENT,
         "import name not fully-qualified (module.func): '%.*s'",
@@ -430,13 +450,15 @@
 
   for (int i = (int)context->list.count - 1; i >= 0; --i) {
     iree_vm_module_t* module = context->list.modules[i];
-    if (iree_string_view_compare(module_name, iree_vm_module_name(module)) ==
-        0) {
-      return iree_vm_module_lookup_function_by_name(
+    if (iree_string_view_equal(module_name, iree_vm_module_name(module))) {
+      iree_status_t status = iree_vm_module_lookup_function_by_name(
           module, IREE_VM_FUNCTION_LINKAGE_EXPORT, function_name, out_function);
+      IREE_TRACE_ZONE_END(z0);
+      return status;
     }
   }
 
+  IREE_TRACE_ZONE_END(z0);
   return iree_make_status(IREE_STATUS_NOT_FOUND,
                           "module '%.*s' required for import '%.*s' not "
                           "registered with the context",
diff --git a/iree/vm/instance.c b/iree/vm/instance.c
index 5cc92d8..1ca17b4 100644
--- a/iree/vm/instance.c
+++ b/iree/vm/instance.c
@@ -15,6 +15,7 @@
 #include "iree/vm/instance.h"
 
 #include "iree/base/atomics.h"
+#include "iree/base/tracing.h"
 #include "iree/vm/builtin_types.h"
 
 struct iree_vm_instance {
@@ -24,24 +25,29 @@
 
 IREE_API_EXPORT iree_status_t IREE_API_CALL iree_vm_instance_create(
     iree_allocator_t allocator, iree_vm_instance_t** out_instance) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_instance);
   *out_instance = NULL;
 
-  IREE_RETURN_IF_ERROR(iree_vm_register_builtin_types());
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, iree_vm_register_builtin_types());
 
   iree_vm_instance_t* instance = NULL;
-  IREE_RETURN_IF_ERROR(iree_allocator_malloc(
-      allocator, sizeof(iree_vm_instance_t), (void**)&instance));
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_allocator_malloc(allocator, sizeof(iree_vm_instance_t),
+                                (void**)&instance));
   instance->allocator = allocator;
   iree_atomic_ref_count_init(&instance->ref_count);
 
   *out_instance = instance;
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
 static void iree_vm_instance_destroy(iree_vm_instance_t* instance) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(instance);
   iree_allocator_free(instance->allocator, instance);
+  IREE_TRACE_ZONE_END(z0);
 }
 
 IREE_API_EXPORT void IREE_API_CALL
diff --git a/iree/vm/module.c b/iree/vm/module.c
index 934896f..54e602f 100644
--- a/iree/vm/module.c
+++ b/iree/vm/module.c
@@ -17,6 +17,7 @@
 #include <string.h>
 
 #include "iree/base/atomics.h"
+#include "iree/base/tracing.h"
 #include "iree/vm/ref.h"
 
 IREE_API_EXPORT iree_status_t IREE_API_CALL
@@ -138,9 +139,11 @@
 
 IREE_API_EXPORT iree_status_t IREE_API_CALL
 iree_vm_module_initialize(iree_vm_module_t* module, void* self) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   memset(module, 0, sizeof(iree_vm_module_t));
   module->self = self;
   iree_atomic_ref_count_init(&module->ref_count);
+  IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
 }
 
@@ -225,8 +228,10 @@
 IREE_API_EXPORT iree_string_view_t IREE_API_CALL
 iree_vm_function_reflection_attr(const iree_vm_function_t* function,
                                  iree_string_view_t key) {
+  IREE_TRACE_ZONE_BEGIN(z0);
   iree_vm_module_t* module = function->module;
   if (!module->get_function_reflection_attr) {
+    IREE_TRACE_ZONE_END(z0);
     return iree_string_view_empty();
   }
   for (int index = 0;; ++index) {
@@ -239,9 +244,11 @@
       break;
     }
     if (iree_string_view_compare(key, index_key) == 0) {
+      IREE_TRACE_ZONE_END(z0);
       return index_value;
     }
   }
+  IREE_TRACE_ZONE_END(z0);
   return iree_string_view_empty();
 }
 
diff --git a/iree/vm/stack.c b/iree/vm/stack.c
index 6fe9e36..8acdd88 100644
--- a/iree/vm/stack.c
+++ b/iree/vm/stack.c
@@ -419,6 +419,15 @@
   stack->frame_storage_size = new_top;
   stack->top = frame_header;
 
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  // TODO(benvanik): cache source location and query from module.
+  iree_string_view_t function_name = iree_vm_function_name(function);
+  IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, function_name.data,
+                                      function_name.size);
+  callee_frame->trace_zone = z0;
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, frame_size);
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
   if (out_callee_frame) *out_callee_frame = callee_frame;
   return iree_ok_status();
 }
@@ -435,6 +444,8 @@
     stack->top->frame_cleanup_fn(&stack->top->frame);
   }
 
+  IREE_TRACE_ZONE_END(stack->top->frame.trace_zone);
+
   // Restore the frame pointer to the caller.
   stack->frame_storage_size -= stack->top->frame_size;
   stack->top = stack->top->parent;
diff --git a/iree/vm/stack.h b/iree/vm/stack.h
index 3f1c580..98fe693 100644
--- a/iree/vm/stack.h
+++ b/iree/vm/stack.h
@@ -20,6 +20,7 @@
 
 #include "iree/base/alignment.h"
 #include "iree/base/api.h"
+#include "iree/base/tracing.h"
 #include "iree/vm/module.h"
 #include "iree/vm/ref.h"
 
@@ -85,6 +86,10 @@
   // offset (such as in the case of VM bytecode), a block identifier (compiled
   // code), etc.
   iree_vm_source_offset_t pc;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  iree_zone_id_t trace_zone;
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
 } iree_vm_stack_frame_t;
 
 // Returns the implementation-defined frame storage associated with |frame|.