Test 'console' provider in 'tracing' job. (#16454)

This also refactors the job to use a GitHub-managed runner with less
random GitHub Actions boilerplate for caching, env vars, etc. since the
'tracing' job is only building the runtime (as of
https://github.com/openxla/iree/pull/6709, from 2021).

Fixes https://github.com/openxla/iree/issues/16405

---------

Signed-off-by: Scott Todd <scott.todd0@gmail.com>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ae5b8b1..cd978e8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -738,30 +738,27 @@
   tracing:
     needs: setup
     if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'tracing')
-    runs-on:
-      - self-hosted # must come first
-      - runner-group=${{ needs.setup.outputs.runner-group }}
-      - environment=${{ needs.setup.outputs.runner-env }}
-      - cpu
-      - os-family=Linux
+    runs-on: ubuntu-20.04
     env:
       BUILD_DIR: build-tracing
     steps:
       - name: "Checking out repository"
         uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
-        with:
-          submodules: true
-      - name: "Building IREE with tracing enabled"
-        env:
-          IREE_WRITE_REMOTE_CCACHE: ${{ needs.setup.outputs.write-caches }}
+      - name: "Checking out runtime submodules"
+        run: ./build_tools/scripts/git/update_runtime_submodules.sh
+      - name: "Building IREE runtime with tracing - Tracy"
         run: |
-          # TODO(#11394): Enable Web GPU
           ./build_tools/github_actions/docker_run.sh \
-            --env "IREE_CCACHE_GCP_TOKEN=$(gcloud auth application-default print-access-token)" \
-            --env "IREE_WRITE_REMOTE_CCACHE=${IREE_WRITE_REMOTE_CCACHE}" \
-            --env "CCACHE_NAMESPACE=gcr.io/iree-oss/base@sha256:dc314b4fe30fc1315742512891357bffed4d1b62ffcb46258b1e0761c737b446" \
+            --env "TRACING_PROVIDER=tracy" \
             gcr.io/iree-oss/base@sha256:dc314b4fe30fc1315742512891357bffed4d1b62ffcb46258b1e0761c737b446 \
-            ./build_tools/cmake/build_tracing.sh \
+            ./build_tools/cmake/build_runtime_tracing.sh \
+            "${BUILD_DIR}"
+      - name: "Building IREE runtime with tracing - console"
+        run: |
+          ./build_tools/github_actions/docker_run.sh \
+            --env "TRACING_PROVIDER=console" \
+            gcr.io/iree-oss/base@sha256:dc314b4fe30fc1315742512891357bffed4d1b62ffcb46258b1e0761c737b446 \
+            ./build_tools/cmake/build_runtime_tracing.sh \
             "${BUILD_DIR}"
 
   debug:
diff --git a/build_tools/cmake/build_tracing.sh b/build_tools/cmake/build_runtime_tracing.sh
similarity index 70%
rename from build_tools/cmake/build_tracing.sh
rename to build_tools/cmake/build_runtime_tracing.sh
index 2d39092..c52b21f 100755
--- a/build_tools/cmake/build_tracing.sh
+++ b/build_tools/cmake/build_runtime_tracing.sh
@@ -9,26 +9,21 @@
 # manually. This uses previously cached build results and does not clear build
 # directories.
 
-set -e
-set -x
+set -xeuo pipefail
 
 BUILD_DIR="${1:-${IREE_TRACING_BUILD_DIR:-build-tracing}}"
+TRACING_PROVIDER="${TRACING_PROVIDER:-tracy}"
 
 source build_tools/cmake/setup_build.sh
-source build_tools/cmake/setup_ccache.sh
+# Note: not using ccache since the runtime build should be fast already.
 
-# Note: https://github.com/iree-org/iree/issues/6404 prevents us from building
-# tests with these other settings. Many tests invoke the compiler tools with
-# MLIR threading enabled, which crashes with compiler tracing enabled.
 "${CMAKE_BIN?}" -B "${BUILD_DIR}" \
   -G Ninja . \
   -DPython3_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \
   -DPYTHON_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE}" \
   -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  -DIREE_ENABLE_LLD=ON \
   -DIREE_ENABLE_RUNTIME_TRACING=ON \
+  -DIREE_TRACING_PROVIDER=${TRACING_PROVIDER} \
   -DIREE_BUILD_COMPILER=OFF
 "${CMAKE_BIN?}" --build "${BUILD_DIR}" -- -k 0
-
-if (( IREE_USE_CCACHE == 1 )); then
-  ccache --show-stats
-fi
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
index 4cd16ab..ea5cb95 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
@@ -560,7 +560,7 @@
                           IREE::VM::TargetOptions vmOptions,
                           IREE::VM::BytecodeTargetOptions bytecodeOptions,
                           llvm::raw_ostream &output) {
-  IREE_TRACE_SCOPE();
+  IREE_COMPILER_TRACE_SCOPE();
   moduleOp.getContext()->getOrLoadDialect<IREE::Util::UtilDialect>();
 
   if (failed(canonicalizeModule(bytecodeOptions, moduleOp))) {
diff --git a/compiler/src/iree/compiler/Utils/TracingUtils.h b/compiler/src/iree/compiler/Utils/TracingUtils.h
index e34be1a..839dbd9 100644
--- a/compiler/src/iree/compiler/Utils/TracingUtils.h
+++ b/compiler/src/iree/compiler/Utils/TracingUtils.h
@@ -41,6 +41,9 @@
   IREE_TRACING_COMPILER_MESSAGE_LEVEL_DEBUG = 0x00FF00u,
 };
 
+// Fork of IREE_TRACE_SCOPE.
+#define IREE_COMPILER_TRACE_SCOPE() ZoneScoped
+
 // Fork of IREE_TRACE_MESSAGE_DYNAMIC, taking std::string (or llvm::StringRef).
 #define IREE_COMPILER_TRACE_MESSAGE_DYNAMIC(level, value_string)               \
   ___tracy_emit_messageC(value_string.data(), value_string.size(),             \
@@ -66,6 +69,7 @@
 createTraceFrameMarkEndPass(llvm::StringRef name = "");
 
 #else
+#define IREE_COMPILER_TRACE_SCOPE()
 #define IREE_COMPILER_TRACE_MESSAGE_DYNAMIC(level, value_string)
 #define IREE_TRACE_ADD_BEGIN_FRAME_PASS(passManager, frameName)
 #define IREE_TRACE_ADD_END_FRAME_PASS(passManager, frameName)
diff --git a/runtime/src/iree/hal/drivers/cuda/nccl_channel.c b/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
index 4b15ba3..5d10ee7 100644
--- a/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
+++ b/runtime/src/iree/hal/drivers/cuda/nccl_channel.c
@@ -562,7 +562,7 @@
         __FUNCTION__, strlen(__FUNCTION__), collective_str.data,
         collective_str.size);
   }
-#endif  // IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
 
   // Issue all collective operations in the batch as part of a group.
   // NCCL may be able to fuse or reduce overheads by issuing like this.
@@ -573,13 +573,13 @@
   }
   IREE_NCCL_RETURN_IF_ERROR(symbols, ncclGroupEnd(), "ncclGroupEnd");
 
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
   // End all zones we began above - note that these are just simply nested so
   // order doesn't matter so long as we end the right number of zones.
-  IREE_TRACE({
-    for (iree_host_size_t i = 0; i < batch->count; ++i) {
-      IREE_CUDA_STREAM_TRACE_ZONE_END(tracing_context, stream);
-    }
-  });
+  for (iree_host_size_t i = 0; i < batch->count; ++i) {
+    IREE_CUDA_STREAM_TRACE_ZONE_END(tracing_context, stream);
+  }
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
 
   return iree_ok_status();
 }
diff --git a/runtime/src/iree/hal/drivers/cuda/tracing.c b/runtime/src/iree/hal/drivers/cuda/tracing.c
index b509557..02d4b20 100644
--- a/runtime/src/iree/hal/drivers/cuda/tracing.c
+++ b/runtime/src/iree/hal/drivers/cuda/tracing.c
@@ -340,4 +340,4 @@
 void iree_hal_cuda_tracing_context_collect(
     iree_hal_cuda_tracing_context_t* context) {}
 
-#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
diff --git a/runtime/src/iree/hal/drivers/cuda/tracing.h b/runtime/src/iree/hal/drivers/cuda/tracing.h
index c4cd69c..65630dd 100644
--- a/runtime/src/iree/hal/drivers/cuda/tracing.h
+++ b/runtime/src/iree/hal/drivers/cuda/tracing.h
@@ -137,7 +137,7 @@
     name, name_length)
 #define IREE_CUDA_STREAM_TRACE_ZONE_END(context, stream)
 
-#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION_DEVICE
 
 #ifdef __cplusplus
 }  // extern "C"