Geoffrey Martin-Noble | 552d3f8 | 2021-05-25 17:56:09 -0700 | [diff] [blame] | 1 | // Copyright 2020 The IREE Authors |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 2 | // |
Geoffrey Martin-Noble | 552d3f8 | 2021-05-25 17:56:09 -0700 | [diff] [blame] | 3 | // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 6 | |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 7 | //===----------------------------------------------------------------------===// |
| 8 | // iree-benchmark-module: benchmarks public functions in an IREE VM module |
| 9 | //===----------------------------------------------------------------------===// |
| 10 | // |
| 11 | // This runs exported functions using flags specified on the command line. |
| 12 | // Each function is measured independently and the numbers reported will be for |
| 13 | // the full end-to-end CPU and wall times. |
| 14 | // |
| 15 | // From an ML perspective this is an integration benchmark for measuring total |
| 16 | // user-visible latency of model entry points. It is *not* a microbenchmarking |
| 17 | // tool for individual device-side dispatch functions (aka ops aka kernels). |
| 18 | // If interested in the precise time of a particular dispatch then tracy, |
| 19 | // executable_library_benchmark, and platform/vendor tooling (nsight, perf, etc) |
| 20 | // are to be used instead and attaching them to this tool is often useful in |
| 21 | // order to get a large sample set. |
| 22 | // |
| 23 | // By default all functions taking no inputs will be benchmarked. If a function |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 24 | // takes inputs then the user will need to specify them using --input= |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 25 | // flags. Depending on the input program the -iree-flow-export-benchmark-funcs |
| 26 | // flag can be passed to the compiler to attempt to wrap each function with |
| 27 | // dummy inputs however this will fail in programs with dynamically shaped |
| 28 | // inputs. The workaround for avoiding the need for flags is to provide the |
| 29 | // input program in a form with no inputs from the start. |
| 30 | // |
| 31 | // It's important to remember that IREE is not a BLAS library and is meant to |
| 32 | // run entire programs. It's not generally appropriate to benchmark a model with |
| 33 | // a single matmul, for example, as that's just treating IREE as a BLAS library. |
| 34 | // Note also that user-level ops in a frontend environment don't map to the |
| 35 | // dispatches that IREE executes: IREE is a compiler like any other and does not |
| 36 | // guarantee a source line of code translates into an atomically divisible and |
| 37 | // independently measurable execution command. In other words don't expect to be |
| 38 | // able to benchmark the cost of a broadcasting elementwise tf.add op within a |
| 39 | // model: by the time we are running the program that's fused itself into a |
| 40 | // single machine instruction operating as part of some other ops. |
| 41 | // |
| 42 | // For coarse dispatch testing and triaging it can still be useful to remove |
| 43 | // some of the overheads introduced by whole-program execution and the compiler |
Scott Todd | 52f62b8 | 2022-05-10 17:51:34 -0700 | [diff] [blame] | 44 | // flag --iree-hal-benchmark-dispatch-repeat-count=N is provided to enable |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 45 | // batching. Whatever N is chosen must then be passed to this tool via |
| 46 | // --batch_size=N so that the benchmark reporting properly reflects the |
Scott Todd | 52f62b8 | 2022-05-10 17:51:34 -0700 | [diff] [blame] | 47 | // batching. As an example --iree-hal-benchmark-dispatch-repeat-count=32 + |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 48 | // --batch_size=32 will reduce the overheads by 32x. Think of this as a way to |
| 49 | // control the p value in Amdahl's law representing the amount of time spent in |
| 50 | // dispatches relative to the rest of the program. This isn't representative of |
| 51 | // how the full program will run, though, and YMMV. Always verify timings with |
| 52 | // an appropriate device-specific tool before trusting the more generic and |
| 53 | // higher-level numbers from this tool. |
| 54 | |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 55 | #include <array> |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 56 | #include <cstdio> |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 57 | #include <iterator> |
| 58 | #include <string> |
| 59 | #include <type_traits> |
| 60 | #include <utility> |
| 61 | #include <vector> |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 62 | |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 63 | #include "benchmark/benchmark.h" |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 64 | #include "iree/base/api.h" |
Ben Vanik | e28d253 | 2021-02-03 13:44:24 -0800 | [diff] [blame] | 65 | #include "iree/base/internal/flags.h" |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 66 | #include "iree/hal/api.h" |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 67 | #include "iree/modules/hal/types.h" |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 68 | #include "iree/tooling/context_util.h" |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 69 | #include "iree/tooling/device_util.h" |
Ben Vanik | 30901f5 | 2024-02-08 11:23:21 -0800 | [diff] [blame] | 70 | #include "iree/tooling/function_io.h" |
Ben Vanik | e8a9ec4 | 2020-07-16 22:04:57 -0700 | [diff] [blame] | 71 | #include "iree/vm/api.h" |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 72 | |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 73 | constexpr char kNanosecondsUnitString[] = "ns"; |
| 74 | constexpr char kMicrosecondsUnitString[] = "us"; |
| 75 | constexpr char kMillisecondsUnitString[] = "ms"; |
| 76 | |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 77 | // TODO(hanchung): Extract the batch size using |
Ben Vanik | 1d60c18 | 2022-06-28 12:37:40 -0700 | [diff] [blame] | 78 | // iree_vm_function_lookup_attr_by_name. |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 79 | IREE_FLAG(int32_t, batch_size, 1, |
| 80 | "Number of invocations per iteration, which for dispatch benchmarks " |
| 81 | "must match the --iree-hal-benchmark-dispatch-repeat-count value " |
| 82 | "used during compilation."); |
| 83 | IREE_FLAG(int32_t, batch_concurrency, 1, |
| 84 | "Number of invocations within a batch that should run concurrently."); |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 85 | |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 86 | IREE_FLAG(string, function, "", |
| 87 | "Name of a function contained in the module specified by --module= " |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 88 | "to run. If this is not set, all the exported functions will be " |
| 89 | "benchmarked and they are expected to not have input arguments."); |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 90 | |
CindyLiu | d9609f2 | 2021-09-29 21:11:49 +0000 | [diff] [blame] | 91 | IREE_FLAG(bool, print_statistics, false, |
| 92 | "Prints runtime statistics to stderr on exit."); |
| 93 | |
Ben Vanik | 7958fc9 | 2023-01-12 08:45:32 -0800 | [diff] [blame] | 94 | IREE_FLAG_LIST( |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 95 | string, input, |
Ben Vanik | ebeb5fc | 2021-04-24 09:40:50 -0700 | [diff] [blame] | 96 | "An input value or buffer of the format:\n" |
| 97 | " [shape]xtype=[value]\n" |
| 98 | " 2x2xi32=1 2 3 4\n" |
| 99 | "Optionally, brackets may be used to separate the element values:\n" |
| 100 | " 2x2xi32=[[1 2][3 4]]\n" |
Ben Vanik | a30c840 | 2022-06-03 19:16:16 -0700 | [diff] [blame] | 101 | "Raw binary files can be read to provide buffer contents:\n" |
| 102 | " 2x2xi32=@some/file.bin\n" |
Ben Vanik | aecb725 | 2022-06-12 15:21:20 -0700 | [diff] [blame] | 103 | "numpy npy files (from numpy.save) can be read to provide 1+ values:\n" |
| 104 | " @some.npy\n" |
Ben Vanik | ebeb5fc | 2021-04-24 09:40:50 -0700 | [diff] [blame] | 105 | "Each occurrence of the flag indicates an input in the order they were\n" |
| 106 | "specified on the command line."); |
Han-Chung Wang | 19316be | 2020-07-17 05:53:02 -0700 | [diff] [blame] | 107 | |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 108 | static iree_status_t parse_time_unit(iree_string_view_t flag_name, |
| 109 | void* storage, iree_string_view_t value) { |
| 110 | auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage; |
| 111 | auto unit_string = std::string(value.data, value.size); |
| 112 | if (unit_string == kMillisecondsUnitString) { |
| 113 | *unit = {true, benchmark::kMillisecond}; |
| 114 | return iree_ok_status(); |
| 115 | } else if (unit_string == kMicrosecondsUnitString) { |
| 116 | *unit = {true, benchmark::kMicrosecond}; |
| 117 | return iree_ok_status(); |
| 118 | } else if (unit_string == kNanosecondsUnitString) { |
| 119 | *unit = {true, benchmark::kNanosecond}; |
| 120 | return iree_ok_status(); |
| 121 | } |
| 122 | return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, |
| 123 | "unsupported time unit"); |
| 124 | } |
| 125 | static void print_time_unit(iree_string_view_t flag_name, void* storage, |
| 126 | FILE* file) { |
| 127 | auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage; |
| 128 | if (!unit->first) { |
| 129 | return; |
| 130 | } |
| 131 | std::string unit_string; |
| 132 | switch (unit->second) { |
| 133 | case benchmark::kMillisecond: |
| 134 | unit_string = kMillisecondsUnitString; |
| 135 | break; |
| 136 | case benchmark::kMicrosecond: |
| 137 | unit_string = kMicrosecondsUnitString; |
| 138 | break; |
| 139 | case benchmark::kNanosecond: |
| 140 | unit_string = kNanosecondsUnitString; |
| 141 | break; |
| 142 | default: |
| 143 | assert(false && "Unexpected time unit."); |
| 144 | } |
| 145 | fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data, |
| 146 | unit_string.c_str()); |
| 147 | } |
| 148 | // Time unit to be printed. If the first field is false, each place will use its |
| 149 | // default time unit. |
| 150 | static std::pair<bool, benchmark::TimeUnit> FLAG_time_unit = { |
| 151 | false, benchmark::kNanosecond}; |
| 152 | IREE_FLAG_CALLBACK( |
| 153 | parse_time_unit, print_time_unit, &FLAG_time_unit, time_unit, |
| 154 | "The time unit to be printed in the results. Can be 'ms', 'us', or 'ns'."); |
| 155 | |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 156 | namespace iree { |
| 157 | namespace { |
| 158 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 159 | static void BenchmarkGenericFunction(const std::string& benchmark_name, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 160 | int32_t batch_size, |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 161 | iree_hal_device_t* device, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 162 | iree_vm_context_t* context, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 163 | iree_vm_function_t function, |
| 164 | iree_vm_list_t* inputs, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 165 | benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 166 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 167 | benchmark_name.size()); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 168 | IREE_TRACE_FRAME_MARK(); |
| 169 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 170 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 171 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 172 | iree_allocator_system(), &outputs)); |
| 173 | |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 174 | // Benchmarking loop. |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 175 | while (state.KeepRunningBatch(batch_size)) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 176 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 177 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
Ben Vanik | 89e9530 | 2021-10-05 17:05:39 -0700 | [diff] [blame] | 178 | IREE_CHECK_OK(iree_vm_invoke( |
| 179 | context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr, |
| 180 | inputs, outputs.get(), iree_allocator_system())); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 181 | IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0)); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 182 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 183 | if (device) { |
| 184 | state.PauseTiming(); |
| 185 | IREE_CHECK_OK(iree_hal_device_profiling_flush(device)); |
| 186 | state.ResumeTiming(); |
| 187 | } |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 188 | } |
Ben Vanik | b4ccbfc | 2022-08-30 15:43:41 -0700 | [diff] [blame] | 189 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 190 | |
| 191 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 192 | } |
| 193 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 194 | void RegisterGenericBenchmark(const std::string& function_name, |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 195 | iree_hal_device_t* device, |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 196 | iree_vm_context_t* context, |
| 197 | iree_vm_function_t function, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 198 | iree_vm_list_t* inputs) { |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 199 | auto benchmark_name = "BM_" + function_name; |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 200 | int32_t batch_size = FLAG_batch_size; |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 201 | benchmark::RegisterBenchmark(benchmark_name.c_str(), |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 202 | [=](benchmark::State& state) -> void { |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 203 | BenchmarkGenericFunction( |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 204 | benchmark_name, batch_size, device, |
| 205 | context, function, inputs, state); |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 206 | }) |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 207 | // By default only the main thread is included in CPU time. Include all |
| 208 | // the threads instead. |
| 209 | ->MeasureProcessCPUTime() |
| 210 | // To make single and multi-threaded benchmarks more comparable, use the |
| 211 | // wall time to determine how many iterations to run. See |
| 212 | // https://github.com/google/benchmark#cpu-timers, |
| 213 | ->UseRealTime() |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 214 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 215 | : benchmark::kMillisecond); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 216 | } |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 217 | |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 218 | // Runs up to |batch_size| pipelined invocations in sequence along with |
| 219 | // concurrency. Example: |
| 220 | // batch_size=1, concurrency=1: |
| 221 | // [invocation 0] |
| 222 | // batch_size=2, concurrency=1: |
| 223 | // [invocation 0] -> [invocation 1] |
| 224 | // batch_size=2, concurrency=2: |
| 225 | // [invocation 0] |
| 226 | // [invocation 1] |
| 227 | // batch_size=4, concurrency=2: |
| 228 | // [invocation 0] -> [invocation 2] |
| 229 | // [invocation 1] -> [invocation 3] |
| 230 | static void BenchmarkAsyncFunction( |
| 231 | const std::string& benchmark_name, int32_t batch_size, |
| 232 | int32_t batch_concurrency, iree_hal_device_t* device, |
| 233 | iree_vm_context_t* context, iree_vm_function_t function, |
| 234 | iree_vm_list_t* common_inputs, benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 235 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 236 | benchmark_name.size()); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 237 | IREE_TRACE_FRAME_MARK(); |
| 238 | iree_allocator_t host_allocator = iree_allocator_system(); |
| 239 | |
| 240 | // Round up batch size to some multiple of concurrency. |
| 241 | batch_size = (int32_t)iree_host_align(batch_size, batch_concurrency); |
| 242 | |
| 243 | // Benchmarking loop. |
| 244 | while (state.KeepRunningBatch(batch_size)) { |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 245 | state.PauseTiming(); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 246 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
| 247 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 248 | |
| 249 | IREE_TRACE_ZONE_BEGIN_NAMED(z_begin, "PrepareBatch"); |
| 250 | |
| 251 | // Each concurrent track of execution gets its own semaphore. |
| 252 | std::vector<vm::ref<iree_hal_semaphore_t>> timeline_semaphores; |
| 253 | for (int32_t i = 0; i < batch_concurrency; ++i) { |
| 254 | vm::ref<iree_hal_semaphore_t> timeline_semaphore; |
Ben Vanik | a28f76f | 2024-08-06 15:04:15 -0700 | [diff] [blame] | 255 | IREE_CHECK_OK(iree_hal_semaphore_create( |
| 256 | device, 0ull, IREE_HAL_SEMAPHORE_FLAG_NONE, &timeline_semaphore)); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 257 | timeline_semaphores.push_back(std::move(timeline_semaphore)); |
| 258 | } |
| 259 | |
| 260 | // Preallocate fences and I/O for each invocation. |
| 261 | // The same inputs are used for each but we need a unique list to hold the |
| 262 | // unique fences. Each fence represents when the invocation has completed. |
| 263 | std::vector<vm::ref<iree_hal_fence_t>> invocation_fences; |
| 264 | std::vector<vm::ref<iree_vm_list_t>> invocation_inputs; |
| 265 | std::vector<vm::ref<iree_vm_list_t>> invocation_outputs; |
| 266 | vm::ref<iree_hal_fence_t> completion_fence; |
| 267 | IREE_CHECK_OK(iree_hal_fence_create(batch_concurrency, host_allocator, |
| 268 | &completion_fence)); |
| 269 | for (int32_t i = 0; i < batch_size / batch_concurrency; ++i) { |
| 270 | for (int32_t j = 0; j < batch_concurrency; ++j) { |
| 271 | // Chain each concurrent minibatch to the previous. Note that to start |
| 272 | // we wait on nothing and begin executing immediately. |
| 273 | vm::ref<iree_hal_fence_t> wait_fence; |
| 274 | if (i > 0) { |
| 275 | wait_fence = vm::retain_ref( |
| 276 | invocation_fences[(i - 1) * batch_concurrency + j]); |
| 277 | } |
| 278 | uint64_t signal_value = i + 1; |
| 279 | vm::ref<iree_hal_fence_t> signal_fence; |
| 280 | IREE_CHECK_OK(iree_hal_fence_create_at(timeline_semaphores[j].get(), |
| 281 | signal_value, host_allocator, |
| 282 | &signal_fence)); |
| 283 | invocation_fences.push_back(vm::retain_ref(signal_fence)); |
| 284 | |
| 285 | // Join the final minibatch on the completion fence. |
| 286 | if (i == batch_size / batch_concurrency - 1) { |
| 287 | IREE_CHECK_OK(iree_hal_fence_insert(completion_fence.get(), |
| 288 | timeline_semaphores[j].get(), |
| 289 | signal_value)); |
| 290 | } |
| 291 | |
| 292 | // Clone common inputs and add the invocation-specific fences. |
| 293 | vm::ref<iree_vm_list_t> inputs; |
| 294 | IREE_CHECK_OK( |
| 295 | iree_vm_list_clone(common_inputs, host_allocator, &inputs)); |
| 296 | IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), wait_fence)); |
| 297 | IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), signal_fence)); |
| 298 | invocation_inputs.push_back(std::move(inputs)); |
| 299 | |
| 300 | // Setup empty outputs. |
| 301 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 302 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 303 | host_allocator, &outputs)); |
| 304 | invocation_outputs.push_back(std::move(outputs)); |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | IREE_TRACE_ZONE_END(z_begin); |
| 309 | |
| 310 | state.ResumeTiming(); |
| 311 | { |
| 312 | // TODO(benvanik): replace with async invocations. Today if the invocation |
| 313 | // performs any waits this will block on the initial invoke instead of |
| 314 | // actually overlapping things. |
| 315 | for (int32_t i = 0; i < batch_size; ++i) { |
| 316 | IREE_CHECK_OK( |
| 317 | iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE, |
| 318 | /*policy=*/nullptr, invocation_inputs[i].get(), |
| 319 | invocation_outputs[i].get(), host_allocator)); |
| 320 | } |
| 321 | IREE_CHECK_OK( |
| 322 | iree_hal_fence_wait(completion_fence.get(), iree_infinite_timeout())); |
| 323 | } |
| 324 | state.PauseTiming(); |
| 325 | |
| 326 | IREE_TRACE_ZONE_BEGIN_NAMED(z_end, "CleanupBatch"); |
| 327 | for (int32_t i = 0; i < batch_size; ++i) { |
| 328 | iree_vm_list_clear(invocation_outputs[i].get()); |
| 329 | } |
| 330 | invocation_fences.clear(); |
| 331 | invocation_inputs.clear(); |
| 332 | invocation_outputs.clear(); |
| 333 | completion_fence.reset(); |
| 334 | timeline_semaphores.clear(); |
| 335 | IREE_TRACE_ZONE_END(z_end); |
| 336 | |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 337 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 338 | if (device) { |
| 339 | IREE_CHECK_OK(iree_hal_device_profiling_flush(device)); |
| 340 | } |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 341 | state.ResumeTiming(); |
| 342 | } |
| 343 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 344 | |
| 345 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 346 | } |
| 347 | |
| 348 | void RegisterAsyncBenchmark(const std::string& function_name, |
| 349 | iree_hal_device_t* device, |
| 350 | iree_vm_context_t* context, |
| 351 | iree_vm_function_t function, |
| 352 | iree_vm_list_t* inputs) { |
| 353 | auto benchmark_name = "BM_" + function_name; |
| 354 | int32_t batch_size = FLAG_batch_size; |
| 355 | int32_t batch_concurrency = FLAG_batch_concurrency; |
| 356 | benchmark::RegisterBenchmark( |
| 357 | benchmark_name.c_str(), |
| 358 | [=](benchmark::State& state) -> void { |
| 359 | BenchmarkAsyncFunction(benchmark_name, batch_size, batch_concurrency, |
| 360 | device, context, function, inputs, state); |
| 361 | }) |
| 362 | // By default only the main thread is included in CPU time. Include all |
| 363 | // the threads instead. |
| 364 | ->MeasureProcessCPUTime() |
| 365 | // To make single and multi-threaded benchmarks more comparable, use the |
| 366 | // wall time to determine how many iterations to run. See |
| 367 | // https://github.com/google/benchmark#cpu-timers, |
| 368 | ->UseRealTime() |
| 369 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 370 | : benchmark::kMillisecond); |
| 371 | } |
| 372 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 373 | static void BenchmarkDispatchFunction(const std::string& benchmark_name, |
| 374 | iree_vm_context_t* context, |
| 375 | iree_vm_function_t function, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 376 | benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 377 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 378 | benchmark_name.size()); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 379 | IREE_TRACE_FRAME_MARK(); |
| 380 | |
| 381 | vm::ref<iree_vm_list_t> inputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 382 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 383 | iree_allocator_system(), &inputs)); |
| 384 | iree_vm_value_t batch_size = iree_vm_value_make_i32(FLAG_batch_size); |
| 385 | IREE_CHECK_OK(iree_vm_list_push_value(inputs.get(), &batch_size)); |
| 386 | |
| 387 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 388 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 389 | iree_allocator_system(), &outputs)); |
| 390 | |
| 391 | // Benchmarking loop. |
| 392 | while (state.KeepRunningBatch(FLAG_batch_size)) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 393 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 394 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
| 395 | IREE_CHECK_OK(iree_vm_invoke( |
| 396 | context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr, |
| 397 | inputs.get(), outputs.get(), iree_allocator_system())); |
| 398 | IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0)); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 399 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 400 | } |
Ben Vanik | b4ccbfc | 2022-08-30 15:43:41 -0700 | [diff] [blame] | 401 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 402 | |
| 403 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 404 | } |
| 405 | |
| 406 | void RegisterDispatchBenchmark(const std::string& function_name, |
| 407 | iree_vm_context_t* context, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 408 | iree_vm_function_t function) { |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 409 | auto benchmark_name = "BM_" + function_name; |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 410 | benchmark::RegisterBenchmark( |
| 411 | benchmark_name.c_str(), |
| 412 | [benchmark_name, context, function](benchmark::State& state) -> void { |
| 413 | BenchmarkDispatchFunction(benchmark_name, context, function, state); |
| 414 | }) |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 415 | // By default only the main thread is included in CPU time. Include all |
| 416 | // the threads instead. |
| 417 | ->MeasureProcessCPUTime() |
| 418 | // To make single and multi-threaded benchmarks more comparable, use the |
| 419 | // wall time to determine how many iterations to run. See |
| 420 | // https://github.com/google/benchmark#cpu-timers, |
| 421 | ->UseRealTime() |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 422 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 423 | : benchmark::kMicrosecond); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 424 | } |
| 425 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 426 | // The lifetime of IREEBenchmark should be as long as |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 427 | // ::benchmark::RunSpecifiedBenchmarks() where the resources are used during |
| 428 | // benchmarking. |
| 429 | class IREEBenchmark { |
| 430 | public: |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 431 | IREEBenchmark() { iree_tooling_module_list_initialize(&module_list_); } |
Ben Vanik | b20b602 | 2021-02-16 12:59:31 -0800 | [diff] [blame] | 432 | |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 433 | ~IREEBenchmark() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 434 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::dtor"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 435 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 436 | // Order matters. Tear down modules first to release resources. |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 437 | inputs_.reset(); |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 438 | context_.reset(); |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 439 | iree_tooling_module_list_reset(&module_list_); |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 440 | instance_.reset(); |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 441 | |
| 442 | // Tear down device last in order to get accurate statistics. |
Ben Vanik | 2b8438f | 2022-08-30 16:07:41 -0700 | [diff] [blame] | 443 | if (device_allocator_ && FLAG_print_statistics) { |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 444 | IREE_IGNORE_ERROR(iree_hal_allocator_statistics_fprint( |
| 445 | stderr, device_allocator_.get())); |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 446 | } |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 447 | device_allocator_.reset(); |
| 448 | device_.reset(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 449 | }; |
| 450 | |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 451 | iree_hal_device_t* device() const { return device_.get(); } |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 452 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 453 | iree_status_t Register() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 454 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Register"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 455 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 456 | if (!instance_ || !device_allocator_ || !context_ || !module_list_.count) { |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 457 | IREE_RETURN_IF_ERROR(Init()); |
| 458 | } |
| 459 | |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 460 | auto function_name = std::string(FLAG_function); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 461 | if (!function_name.empty()) { |
| 462 | IREE_RETURN_IF_ERROR(RegisterSpecificFunction(function_name)); |
| 463 | } else { |
| 464 | IREE_RETURN_IF_ERROR(RegisterAllExportedFunctions()); |
| 465 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 466 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 467 | } |
| 468 | |
| 469 | private: |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 470 | iree_status_t Init() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 471 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Init"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 472 | IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init"); |
| 473 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 474 | iree_allocator_t host_allocator = iree_allocator_system(); |
Ben Vanik | 35bc9a1 | 2022-03-09 09:05:58 -0800 | [diff] [blame] | 475 | IREE_RETURN_IF_ERROR( |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 476 | iree_tooling_create_instance(host_allocator, &instance_)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 477 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 478 | IREE_RETURN_IF_ERROR(iree_tooling_load_modules_from_flags( |
| 479 | instance_.get(), host_allocator, &module_list_)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 480 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 481 | IREE_RETURN_IF_ERROR(iree_tooling_create_context_from_flags( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 482 | instance_.get(), module_list_.count, module_list_.values, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 483 | /*default_device_uri=*/iree_string_view_empty(), host_allocator, |
| 484 | &context_, &device_, &device_allocator_)); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 485 | |
| 486 | IREE_TRACE_FRAME_MARK_END_NAMED("init"); |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 487 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 488 | } |
| 489 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 490 | iree_status_t RegisterSpecificFunction(const std::string& function_name) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 491 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterSpecificFunction"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 492 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 493 | iree_vm_module_t* main_module = |
| 494 | iree_tooling_module_list_back(&module_list_); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 495 | iree_vm_function_t function; |
Ben Vanik | b697e76 | 2022-06-15 12:07:58 -0700 | [diff] [blame] | 496 | IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_name( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 497 | main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, |
Scott Todd | 60b0764 | 2023-06-15 09:41:01 -0700 | [diff] [blame] | 498 | iree_string_view_t{function_name.data(), |
| 499 | (iree_host_size_t)function_name.size()}, |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 500 | &function)); |
Ben Vanik | 30901f5 | 2024-02-08 11:23:21 -0800 | [diff] [blame] | 501 | iree_vm_function_signature_t signature = |
| 502 | iree_vm_function_signature(&function); |
| 503 | iree_string_view_t arguments_cconv, results_cconv; |
| 504 | IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments( |
| 505 | &signature, &arguments_cconv, &results_cconv)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 506 | |
Ben Vanik | 30901f5 | 2024-02-08 11:23:21 -0800 | [diff] [blame] | 507 | IREE_CHECK_OK(iree_tooling_parse_variants( |
| 508 | arguments_cconv, FLAG_input_list(), device_.get(), |
| 509 | device_allocator_.get(), iree_vm_instance_allocator(instance_.get()), |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 510 | &inputs_)); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 511 | |
| 512 | iree_string_view_t invocation_model = iree_vm_function_lookup_attr_by_name( |
| 513 | &function, IREE_SV("iree.abi.model")); |
| 514 | if (iree_string_view_equal(invocation_model, IREE_SV("coarse-fences"))) { |
| 515 | // Asynchronous invocation. |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 516 | iree::RegisterAsyncBenchmark(function_name, device_.get(), context_.get(), |
| 517 | function, inputs_.get()); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 518 | } else { |
| 519 | // Synchronous invocation. |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 520 | iree::RegisterGenericBenchmark(function_name, device_.get(), |
| 521 | context_.get(), function, inputs_.get()); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 522 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 523 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 524 | } |
| 525 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 526 | iree_status_t RegisterAllExportedFunctions() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 527 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterAllExportedFunctions"); |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 528 | iree_vm_module_t* main_module = |
| 529 | iree_tooling_module_list_back(&module_list_); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 530 | iree_vm_module_signature_t signature = |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 531 | iree_vm_module_signature(main_module); |
Ben Vanik | 7f3a7e3 | 2020-11-14 14:16:07 -0800 | [diff] [blame] | 532 | for (iree_host_size_t i = 0; i < signature.export_function_count; ++i) { |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 533 | iree_vm_function_t function; |
| 534 | IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 535 | main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function)); |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 536 | iree_string_view_t function_name = iree_vm_function_name(&function); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 537 | |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 538 | // We run anything with the 'benchmark' attribute. |
| 539 | // If the attribute is not present we'll run anything that looks runnable. |
Ben Vanik | 1d60c18 | 2022-06-28 12:37:40 -0700 | [diff] [blame] | 540 | iree_string_view_t benchmark_type = iree_vm_function_lookup_attr_by_name( |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 541 | &function, IREE_SV("iree.benchmark")); |
| 542 | if (iree_string_view_equal(benchmark_type, IREE_SV("dispatch"))) { |
| 543 | iree::RegisterDispatchBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 544 | std::string(function_name.data, function_name.size), context_.get(), |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 545 | function); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 546 | } else if (iree_string_view_equal(benchmark_type, IREE_SV("entry"))) { |
| 547 | iree::RegisterGenericBenchmark( |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 548 | std::string(function_name.data, function_name.size), device_.get(), |
| 549 | context_.get(), function, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 550 | /*inputs=*/nullptr); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 551 | } else { |
| 552 | // Pick up generic () -> () functions. |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 553 | if (iree_string_view_starts_with(function_name, |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 554 | iree_make_cstring_view("__")) || |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 555 | iree_string_view_find_char(function_name, '$', 0) != |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 556 | IREE_STRING_VIEW_NPOS) { |
| 557 | // Skip internal or special functions. |
| 558 | continue; |
| 559 | } |
| 560 | |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 561 | // Query function information to determine how to run it. |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 562 | iree_vm_function_signature_t signature = |
| 563 | iree_vm_function_signature(&function); |
| 564 | iree_host_size_t argument_count = 0; |
| 565 | iree_host_size_t result_count = 0; |
| 566 | IREE_RETURN_IF_ERROR(iree_vm_function_call_count_arguments_and_results( |
| 567 | &signature, &argument_count, &result_count)); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 568 | iree_string_view_t invocation_model = |
| 569 | iree_vm_function_lookup_attr_by_name(&function, |
| 570 | IREE_SV("iree.abi.model")); |
| 571 | if (iree_string_view_equal(invocation_model, |
| 572 | IREE_SV("coarse-fences"))) { |
| 573 | // Asynchronous invocation with coarse fences. Expect just those. |
| 574 | if (argument_count == 2) { |
| 575 | // Only functions taking a (wait, signal) fence pair are run. |
| 576 | iree::RegisterAsyncBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 577 | std::string(function_name.data, function_name.size), |
| 578 | device_.get(), context_.get(), function, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 579 | /*inputs=*/nullptr); |
| 580 | } |
| 581 | } else { |
| 582 | // Basic synchronous invocation. |
| 583 | if (argument_count == 0) { |
| 584 | // Only functions with no inputs are run (because we can't pass |
| 585 | // anything). |
| 586 | iree::RegisterGenericBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 587 | std::string(function_name.data, function_name.size), |
Ben Vanik | 82be925 | 2023-08-25 11:12:18 -0700 | [diff] [blame] | 588 | device_.get(), context_.get(), function, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 589 | /*inputs=*/nullptr); |
| 590 | } |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 591 | } |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 592 | } |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 593 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 594 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 595 | } |
| 596 | |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 597 | iree::vm::ref<iree_vm_instance_t> instance_; |
| 598 | iree::vm::ref<iree_vm_context_t> context_; |
| 599 | iree::vm::ref<iree_hal_device_t> device_; |
| 600 | iree::vm::ref<iree_hal_allocator_t> device_allocator_; |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 601 | iree_tooling_module_list_t module_list_; |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 602 | iree::vm::ref<iree_vm_list_t> inputs_; |
| 603 | }; |
Ahmed S. Taei | 7a2f5ea | 2020-10-06 20:08:53 -0700 | [diff] [blame] | 604 | } // namespace |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 605 | } // namespace iree |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 606 | |
| 607 | int main(int argc, char** argv) { |
Ben Vanik | 7ed4f4b | 2023-06-14 13:33:54 -0700 | [diff] [blame] | 608 | IREE_TRACE_APP_ENTER(); |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame] | 609 | IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree-benchmark-module"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 610 | |
Ben Vanik | 1cb2f7a | 2021-04-26 16:32:53 -0700 | [diff] [blame] | 611 | // Pass through flags to benchmark (allowing --help to fall through). |
Stella Laurenzo | a2733b0 | 2023-11-08 13:22:50 -0800 | [diff] [blame] | 612 | iree_flags_set_usage( |
| 613 | "iree-benchmark-module", |
| 614 | "Benchmarks a function within a compiled IREE module and handles I/O\n" |
| 615 | "parsing. Modules can be provided by file path (`--module=file.vmfb`)\n" |
| 616 | "or read from stdin (`--module=-`) and the function to execute\n" |
| 617 | "matches the original name provided to the compiler\n" |
| 618 | "(`--function=foo` for `func.func @foo`).\n"); |
Ben Vanik | 1cb2f7a | 2021-04-26 16:32:53 -0700 | [diff] [blame] | 619 | iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK | |
| 620 | IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP, |
| 621 | &argc, &argv); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 622 | ::benchmark::Initialize(&argc, argv); |
Ben Vanik | 1cb2f7a | 2021-04-26 16:32:53 -0700 | [diff] [blame] | 623 | |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 624 | iree::IREEBenchmark iree_benchmark; |
Ben Vanik | 524c8e7 | 2021-05-01 15:48:44 -0700 | [diff] [blame] | 625 | iree_status_t status = iree_benchmark.Register(); |
| 626 | if (!iree_status_is_ok(status)) { |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame] | 627 | int exit_code = static_cast<int>(iree_status_code(status)); |
bjacob | 1cb92dd | 2022-09-26 16:21:02 +0000 | [diff] [blame] | 628 | printf("%s\n", iree::Status(std::move(status)).ToString().c_str()); |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame] | 629 | IREE_TRACE_ZONE_END(z0); |
| 630 | IREE_TRACE_APP_EXIT(exit_code); |
| 631 | return exit_code; |
Han-Chung Wang | bb9bcd3 | 2020-10-07 08:18:05 -0700 | [diff] [blame] | 632 | } |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 633 | IREE_CHECK_OK(iree_hal_begin_profiling_from_flags(iree_benchmark.device())); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 634 | ::benchmark::RunSpecifiedBenchmarks(); |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 635 | IREE_CHECK_OK(iree_hal_end_profiling_from_flags(iree_benchmark.device())); |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame] | 636 | |
| 637 | IREE_TRACE_ZONE_END(z0); |
| 638 | IREE_TRACE_APP_EXIT(EXIT_SUCCESS); |
| 639 | return EXIT_SUCCESS; |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 640 | } |