Set CPUs to max frequency before running benchmarks (#7728)

The aim is that this will reduce variance in benchmarks. Notably, this
reduces our ability to capture important real-world scenarios, like
running across threads with different clock speeds. We are deliberately
making a tradeoff right now to get variance under control and we may
revisit this in the future. Pinning to max clock speed is safe and
consistent only because the benchmark phones are on cooling plates so
thermal throttling won't be a problem.

Only the Pixel phone is pinned because we don't have the Samsung phones
rooted (because Samsung makes this a PITA and doing so caused the phone
to randomly reboot all the time).
diff --git a/build_tools/benchmarks/run_benchmarks_on_android.py b/build_tools/benchmarks/run_benchmarks_on_android.py
index fde034e..5bdeb98 100755
--- a/build_tools/benchmarks/run_benchmarks_on_android.py
+++ b/build_tools/benchmarks/run_benchmarks_on_android.py
@@ -108,7 +108,7 @@
 
 
 def adb_push_to_tmp_dir(content: str,
-                        relative_dir: str,
+                        relative_dir: str = "",
                         verbose: bool = False) -> str:
   """Pushes content onto the Android device.
 
@@ -127,11 +127,13 @@
   return android_path
 
 
-def adb_execute_in_dir(cmd_args: Sequence[str],
-                       relative_dir: str,
-                       verbose: bool = False) -> str:
-  """Executes command with adb shell in a directory, waits for completion,
-  and returns the output.
+def adb_execute_and_get_output(cmd_args: Sequence[str],
+                               relative_dir: str = "",
+                               verbose: bool = False) -> str:
+  """Executes command with adb shell.
+
+  Switches to `relative_dir` relative to the android tmp directory before
+  executing. Waits for completion and returns the command stdout.
 
   Args:
     cmd_args: a list containing the command to execute and its parameters
@@ -142,16 +144,40 @@
     A string for the command output.
   """
   cmd = ["adb", "shell"]
-  cmd.extend(["cd", f"{ANDROID_TMP_DIR}/{relative_dir}"])
+  cmd.extend(["cd", os.path.join(ANDROID_TMP_DIR, relative_dir)])
   cmd.append("&&")
   cmd.extend(cmd_args)
 
   return execute_cmd_and_get_output(cmd, verbose=verbose)
 
 
-def adb_start_in_dir(cmd_args: Sequence[str],
-                     relative_dir: str,
-                     verbose: bool = False) -> subprocess.Popen:
+def adb_execute(cmd_args: Sequence[str],
+                relative_dir: str = "",
+                verbose: bool = False) -> subprocess.CompletedProcess:
+  """Executes command with adb shell.
+
+  Switches to `relative_dir` relative to the android tmp directory before
+  executing. Waits for completion. Output is streamed to the terminal.
+
+  Args:
+    cmd_args: a list containing the command to execute and its parameters
+    relative_dir: the directory to execute the command in; relative to
+      ANDROID_TMP_DIR.
+
+  Returns:
+    The completed process.
+  """
+  cmd = ["adb", "shell"]
+  cmd.extend(["cd", os.path.join(ANDROID_TMP_DIR, relative_dir)])
+  cmd.append("&&")
+  cmd.extend(cmd_args)
+
+  return execute_cmd(cmd, verbose=verbose)
+
+
+def adb_start_cmd(cmd_args: Sequence[str],
+                  relative_dir: str,
+                  verbose: bool = False) -> subprocess.Popen:
   """Executes command with adb shell in a directory and returns the handle
   without waiting for completion.
 
@@ -372,9 +398,9 @@
             "--benchmark_out_format=json",
             f"--benchmark_out='{benchmark_results_basename}'",
         ]
-        result_json = adb_execute_in_dir(cmd,
-                                         android_relative_dir,
-                                         verbose=verbose)
+        result_json = adb_execute_and_get_output(cmd,
+                                                 android_relative_dir,
+                                                 verbose=verbose)
 
         # Pull the result file back onto the host and set the filename for later
         # return.
@@ -400,9 +426,7 @@
 
         # Just launch the traced benchmark tool with TRACY_NO_EXIT=1 without
         # waiting for the adb command to complete as that won't happen.
-        process = adb_start_in_dir(run_cmd,
-                                   android_relative_dir,
-                                   verbose=verbose)
+        process = adb_start_cmd(run_cmd, android_relative_dir, verbose=verbose)
         # But we do need to wait for its start; otherwise will see connection
         # failure when opening the catpure tool. Here we cannot just sleep a
         # certain amount of seconds---Pixel 4 seems to have an issue that will
@@ -523,6 +547,14 @@
   return (benchmark_files, captures, errors)
 
 
+def set_frequency_scaling_governor(governor: str):
+  git_root = execute_cmd_and_get_output(["git", "rev-parse", "--show-toplevel"])
+  cpu_script = os.path.join(
+      git_root, "build_tools/benchmarks/set_android_scaling_governor.sh")
+  adb_push_to_tmp_dir(cpu_script)
+  adb_execute(["su", "root", "./set_android_scaling_governor.sh", governor])
+
+
 def parse_arguments():
   """Parses command-line options."""
 
@@ -581,6 +613,11 @@
                       action="store_true",
                       help="Print internal information during execution")
   parser.add_argument(
+      "--pin-cpu-freq",
+      "--pin_cpu_freq",
+      action="store_true",
+      help="Pin CPU frequency for all cores to the maximum. Requires root")
+  parser.add_argument(
       "--keep_going",
       "--keep-going",
       action="store_true",
@@ -620,6 +657,10 @@
     raise ValueError(f"Unrecognized GPU name: '{device_info.gpu_name}'; "
                      "need to update the map")
 
+  if args.pin_cpu_freq:
+    set_frequency_scaling_governor("performance")
+    atexit.register(set_frequency_scaling_governor, "schedutil")
+
   previous_benchmarks = None
   previous_captures = None
 
diff --git a/build_tools/benchmarks/set_android_scaling_governor.sh b/build_tools/benchmarks/set_android_scaling_governor.sh
new file mode 100755
index 0000000..4f73f87
--- /dev/null
+++ b/build_tools/benchmarks/set_android_scaling_governor.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Runs on an android device itself to set the frequency scaling governor for all
+# CPUs (default performance).
+
+################################### WARNING ####################################
+# This will overheat the phone if it's not on a cooling plate, resulting in    #
+# thermal throttling. To prevent anything catching on fire, the actual CPU     #
+# frequencies will be throttled to below the maximum, skewing your results.    #
+################################################################################
+
+set -euo pipefail
+
+GOVERNOR="${1:-performance}"
+
+echo "CPU info (before changing governor):"
+echo 'cpu\tgovernor\tcur\tmin\tmax'
+echo "------------------------------------------------"
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+    echo "cpu${i}" | paste \
+      - \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \
+done
+
+echo "Setting CPU frequency governor to ${GOVERNOR}"
+
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+  echo "${GOVERNOR}" > \
+    "/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor"; \
+done
+
+echo "CPU info (after changing governor):"
+echo 'cpu\tgovernor\tcur\tmin\tmax'
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+    echo "cpu${i}" | paste \
+      - \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \
+done
diff --git a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
index cb0315b..fc2bc27 100644
--- a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
+++ b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
@@ -32,7 +32,7 @@
       - "tar -xzvf benchmark-suites-${BUILDKITE_BUILD_NUMBER}.tgz"
       - "tar -xzvf iree-android-tools-${BUILDKITE_BUILD_NUMBER}.tgz"
       - "tar -xzvf tracy-capture-058e8901.tgz"
-      - "python3 build_tools/benchmarks/run_benchmarks_on_android.py --normal_benchmark_tool=build-android/iree/tools/iree-benchmark-module --traced_benchmark_tool=build-android-trace/iree/tools/iree-benchmark-module --trace_capture_tool=tracy-capture -o benchmark-results-pixel-4-${BUILDKITE_BUILD_NUMBER}.json --capture_tarball=trace-captures-pixel-4-${BUILDKITE_BUILD_NUMBER}.tgz --verbose build-host/"
+      - "python3 build_tools/benchmarks/run_benchmarks_on_android.py --pin-cpu-freq --normal_benchmark_tool=build-android/iree/tools/iree-benchmark-module --traced_benchmark_tool=build-android-trace/iree/tools/iree-benchmark-module --trace_capture_tool=tracy-capture -o benchmark-results-pixel-4-${BUILDKITE_BUILD_NUMBER}.json --capture_tarball=trace-captures-pixel-4-${BUILDKITE_BUILD_NUMBER}.tgz --verbose build-host/"
     if: "build.pull_request.id == null || (build.pull_request.labels includes 'buildkite:benchmark')"
     agents:
       - "android-soc=snapdragon-855"