Geoffrey Martin-Noble | 552d3f8 | 2021-05-25 17:56:09 -0700 | [diff] [blame] | 1 | // Copyright 2020 The IREE Authors |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 2 | // |
Geoffrey Martin-Noble | 552d3f8 | 2021-05-25 17:56:09 -0700 | [diff] [blame] | 3 | // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 6 | |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 7 | //===----------------------------------------------------------------------===// |
| 8 | // iree-benchmark-module: benchmarks public functions in an IREE VM module |
| 9 | //===----------------------------------------------------------------------===// |
| 10 | // |
| 11 | // This runs exported functions using flags specified on the command line. |
| 12 | // Each function is measured independently and the numbers reported will be for |
| 13 | // the full end-to-end CPU and wall times. |
| 14 | // |
| 15 | // From an ML perspective this is an integration benchmark for measuring total |
| 16 | // user-visible latency of model entry points. It is *not* a microbenchmarking |
| 17 | // tool for individual device-side dispatch functions (aka ops aka kernels). |
| 18 | // If interested in the precise time of a particular dispatch then tracy, |
| 19 | // executable_library_benchmark, and platform/vendor tooling (nsight, perf, etc) |
| 20 | // are to be used instead and attaching them to this tool is often useful in |
| 21 | // order to get a large sample set. |
| 22 | // |
| 23 | // By default all functions taking no inputs will be benchmarked. If a function |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 24 | // takes inputs then the user will need to specify them using --input= |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 25 | // flags. Depending on the input program the -iree-flow-export-benchmark-funcs |
| 26 | // flag can be passed to the compiler to attempt to wrap each function with |
| 27 | // dummy inputs however this will fail in programs with dynamically shaped |
| 28 | // inputs. The workaround for avoiding the need for flags is to provide the |
| 29 | // input program in a form with no inputs from the start. |
| 30 | // |
| 31 | // It's important to remember that IREE is not a BLAS library and is meant to |
| 32 | // run entire programs. It's not generally appropriate to benchmark a model with |
| 33 | // a single matmul, for example, as that's just treating IREE as a BLAS library. |
| 34 | // Note also that user-level ops in a frontend environment don't map to the |
| 35 | // dispatches that IREE executes: IREE is a compiler like any other and does not |
| 36 | // guarantee a source line of code translates into an atomically divisible and |
| 37 | // independently measurable execution command. In other words don't expect to be |
| 38 | // able to benchmark the cost of a broadcasting elementwise tf.add op within a |
| 39 | // model: by the time we are running the program that's fused itself into a |
| 40 | // single machine instruction operating as part of some other ops. |
| 41 | // |
| 42 | // For coarse dispatch testing and triaging it can still be useful to remove |
| 43 | // some of the overheads introduced by whole-program execution and the compiler |
Scott Todd | 52f62b8 | 2022-05-10 17:51:34 -0700 | [diff] [blame] | 44 | // flag --iree-hal-benchmark-dispatch-repeat-count=N is provided to enable |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 45 | // batching. Whatever N is chosen must then be passed to this tool via |
| 46 | // --batch_size=N so that the benchmark reporting properly reflects the |
Scott Todd | 52f62b8 | 2022-05-10 17:51:34 -0700 | [diff] [blame] | 47 | // batching. As an example --iree-hal-benchmark-dispatch-repeat-count=32 + |
Ben Vanik | 3148a51 | 2022-04-06 12:58:06 -0700 | [diff] [blame] | 48 | // --batch_size=32 will reduce the overheads by 32x. Think of this as a way to |
| 49 | // control the p value in Amdahl's law representing the amount of time spent in |
| 50 | // dispatches relative to the rest of the program. This isn't representative of |
| 51 | // how the full program will run, though, and YMMV. Always verify timings with |
| 52 | // an appropriate device-specific tool before trusting the more generic and |
| 53 | // higher-level numbers from this tool. |
| 54 | |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 55 | #include <array> |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 56 | #include <cstdio> |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 57 | #include <iterator> |
| 58 | #include <string> |
| 59 | #include <type_traits> |
| 60 | #include <utility> |
| 61 | #include <vector> |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 62 | |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 63 | #include "benchmark/benchmark.h" |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 64 | #include "iree/base/api.h" |
Ben Vanik | e28d253 | 2021-02-03 13:44:24 -0800 | [diff] [blame] | 65 | #include "iree/base/internal/flags.h" |
Ben Vanik | 931a3b1 | 2021-05-20 13:27:13 -0700 | [diff] [blame] | 66 | #include "iree/hal/api.h" |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 67 | #include "iree/modules/hal/types.h" |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 68 | #include "iree/tooling/context_util.h" |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 69 | #include "iree/tooling/device_util.h" |
Ben Vanik | 7958fc9 | 2023-01-12 08:45:32 -0800 | [diff] [blame] | 70 | #include "iree/tooling/vm_util.h" |
Ben Vanik | e8a9ec4 | 2020-07-16 22:04:57 -0700 | [diff] [blame] | 71 | #include "iree/vm/api.h" |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 72 | |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 73 | constexpr char kNanosecondsUnitString[] = "ns"; |
| 74 | constexpr char kMicrosecondsUnitString[] = "us"; |
| 75 | constexpr char kMillisecondsUnitString[] = "ms"; |
| 76 | |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 77 | // TODO(hanchung): Extract the batch size using |
Ben Vanik | 1d60c18 | 2022-06-28 12:37:40 -0700 | [diff] [blame] | 78 | // iree_vm_function_lookup_attr_by_name. |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 79 | IREE_FLAG(int32_t, batch_size, 1, |
| 80 | "Number of invocations per iteration, which for dispatch benchmarks " |
| 81 | "must match the --iree-hal-benchmark-dispatch-repeat-count value " |
| 82 | "used during compilation."); |
| 83 | IREE_FLAG(int32_t, batch_concurrency, 1, |
| 84 | "Number of invocations within a batch that should run concurrently."); |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 85 | |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 86 | IREE_FLAG(string, function, "", |
| 87 | "Name of a function contained in the module specified by --module= " |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 88 | "to run. If this is not set, all the exported functions will be " |
| 89 | "benchmarked and they are expected to not have input arguments."); |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 90 | |
CindyLiu | d9609f2 | 2021-09-29 21:11:49 +0000 | [diff] [blame] | 91 | IREE_FLAG(bool, print_statistics, false, |
| 92 | "Prints runtime statistics to stderr on exit."); |
| 93 | |
Ben Vanik | 7958fc9 | 2023-01-12 08:45:32 -0800 | [diff] [blame] | 94 | IREE_FLAG_LIST( |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 95 | string, input, |
Ben Vanik | ebeb5fc | 2021-04-24 09:40:50 -0700 | [diff] [blame] | 96 | "An input value or buffer of the format:\n" |
| 97 | " [shape]xtype=[value]\n" |
| 98 | " 2x2xi32=1 2 3 4\n" |
| 99 | "Optionally, brackets may be used to separate the element values:\n" |
| 100 | " 2x2xi32=[[1 2][3 4]]\n" |
Ben Vanik | a30c840 | 2022-06-03 19:16:16 -0700 | [diff] [blame] | 101 | "Raw binary files can be read to provide buffer contents:\n" |
| 102 | " 2x2xi32=@some/file.bin\n" |
Ben Vanik | aecb725 | 2022-06-12 15:21:20 -0700 | [diff] [blame] | 103 | "numpy npy files (from numpy.save) can be read to provide 1+ values:\n" |
| 104 | " @some.npy\n" |
Ben Vanik | ebeb5fc | 2021-04-24 09:40:50 -0700 | [diff] [blame] | 105 | "Each occurrence of the flag indicates an input in the order they were\n" |
| 106 | "specified on the command line."); |
Han-Chung Wang | 19316be | 2020-07-17 05:53:02 -0700 | [diff] [blame] | 107 | |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 108 | static iree_status_t parse_time_unit(iree_string_view_t flag_name, |
| 109 | void* storage, iree_string_view_t value) { |
| 110 | auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage; |
| 111 | auto unit_string = std::string(value.data, value.size); |
| 112 | if (unit_string == kMillisecondsUnitString) { |
| 113 | *unit = {true, benchmark::kMillisecond}; |
| 114 | return iree_ok_status(); |
| 115 | } else if (unit_string == kMicrosecondsUnitString) { |
| 116 | *unit = {true, benchmark::kMicrosecond}; |
| 117 | return iree_ok_status(); |
| 118 | } else if (unit_string == kNanosecondsUnitString) { |
| 119 | *unit = {true, benchmark::kNanosecond}; |
| 120 | return iree_ok_status(); |
| 121 | } |
| 122 | return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, |
| 123 | "unsupported time unit"); |
| 124 | } |
| 125 | static void print_time_unit(iree_string_view_t flag_name, void* storage, |
| 126 | FILE* file) { |
| 127 | auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage; |
| 128 | if (!unit->first) { |
| 129 | return; |
| 130 | } |
| 131 | std::string unit_string; |
| 132 | switch (unit->second) { |
| 133 | case benchmark::kMillisecond: |
| 134 | unit_string = kMillisecondsUnitString; |
| 135 | break; |
| 136 | case benchmark::kMicrosecond: |
| 137 | unit_string = kMicrosecondsUnitString; |
| 138 | break; |
| 139 | case benchmark::kNanosecond: |
| 140 | unit_string = kNanosecondsUnitString; |
| 141 | break; |
| 142 | default: |
| 143 | assert(false && "Unexpected time unit."); |
| 144 | } |
| 145 | fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data, |
| 146 | unit_string.c_str()); |
| 147 | } |
| 148 | // Time unit to be printed. If the first field is false, each place will use its |
| 149 | // default time unit. |
| 150 | static std::pair<bool, benchmark::TimeUnit> FLAG_time_unit = { |
| 151 | false, benchmark::kNanosecond}; |
| 152 | IREE_FLAG_CALLBACK( |
| 153 | parse_time_unit, print_time_unit, &FLAG_time_unit, time_unit, |
| 154 | "The time unit to be printed in the results. Can be 'ms', 'us', or 'ns'."); |
| 155 | |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 156 | namespace iree { |
| 157 | namespace { |
| 158 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 159 | static void BenchmarkGenericFunction(const std::string& benchmark_name, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 160 | int32_t batch_size, |
| 161 | iree_vm_context_t* context, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 162 | iree_vm_function_t function, |
| 163 | iree_vm_list_t* inputs, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 164 | benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 165 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 166 | benchmark_name.size()); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 167 | IREE_TRACE_FRAME_MARK(); |
| 168 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 169 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 170 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 171 | iree_allocator_system(), &outputs)); |
| 172 | |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 173 | // Benchmarking loop. |
Han-Chung Wang | a43dd17 | 2021-03-11 02:44:05 +0800 | [diff] [blame] | 174 | while (state.KeepRunningBatch(batch_size)) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 175 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 176 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
Ben Vanik | 89e9530 | 2021-10-05 17:05:39 -0700 | [diff] [blame] | 177 | IREE_CHECK_OK(iree_vm_invoke( |
| 178 | context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr, |
| 179 | inputs, outputs.get(), iree_allocator_system())); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 180 | IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0)); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 181 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 182 | } |
Ben Vanik | b4ccbfc | 2022-08-30 15:43:41 -0700 | [diff] [blame] | 183 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 184 | |
| 185 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 186 | } |
| 187 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 188 | void RegisterGenericBenchmark(const std::string& function_name, |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 189 | iree_vm_context_t* context, |
| 190 | iree_vm_function_t function, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 191 | iree_vm_list_t* inputs) { |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 192 | auto benchmark_name = "BM_" + function_name; |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 193 | int32_t batch_size = FLAG_batch_size; |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 194 | benchmark::RegisterBenchmark(benchmark_name.c_str(), |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 195 | [=](benchmark::State& state) -> void { |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 196 | BenchmarkGenericFunction( |
| 197 | benchmark_name, batch_size, context, |
| 198 | function, inputs, state); |
| 199 | }) |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 200 | // By default only the main thread is included in CPU time. Include all |
| 201 | // the threads instead. |
| 202 | ->MeasureProcessCPUTime() |
| 203 | // To make single and multi-threaded benchmarks more comparable, use the |
| 204 | // wall time to determine how many iterations to run. See |
| 205 | // https://github.com/google/benchmark#cpu-timers, |
| 206 | ->UseRealTime() |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 207 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 208 | : benchmark::kMillisecond); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 209 | } |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 210 | |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 211 | // Runs up to |batch_size| pipelined invocations in sequence along with |
| 212 | // concurrency. Example: |
| 213 | // batch_size=1, concurrency=1: |
| 214 | // [invocation 0] |
| 215 | // batch_size=2, concurrency=1: |
| 216 | // [invocation 0] -> [invocation 1] |
| 217 | // batch_size=2, concurrency=2: |
| 218 | // [invocation 0] |
| 219 | // [invocation 1] |
| 220 | // batch_size=4, concurrency=2: |
| 221 | // [invocation 0] -> [invocation 2] |
| 222 | // [invocation 1] -> [invocation 3] |
| 223 | static void BenchmarkAsyncFunction( |
| 224 | const std::string& benchmark_name, int32_t batch_size, |
| 225 | int32_t batch_concurrency, iree_hal_device_t* device, |
| 226 | iree_vm_context_t* context, iree_vm_function_t function, |
| 227 | iree_vm_list_t* common_inputs, benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 228 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 229 | benchmark_name.size()); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 230 | IREE_TRACE_FRAME_MARK(); |
| 231 | iree_allocator_t host_allocator = iree_allocator_system(); |
| 232 | |
| 233 | // Round up batch size to some multiple of concurrency. |
| 234 | batch_size = (int32_t)iree_host_align(batch_size, batch_concurrency); |
| 235 | |
| 236 | // Benchmarking loop. |
| 237 | while (state.KeepRunningBatch(batch_size)) { |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 238 | state.PauseTiming(); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 239 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
| 240 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 241 | |
| 242 | IREE_TRACE_ZONE_BEGIN_NAMED(z_begin, "PrepareBatch"); |
| 243 | |
| 244 | // Each concurrent track of execution gets its own semaphore. |
| 245 | std::vector<vm::ref<iree_hal_semaphore_t>> timeline_semaphores; |
| 246 | for (int32_t i = 0; i < batch_concurrency; ++i) { |
| 247 | vm::ref<iree_hal_semaphore_t> timeline_semaphore; |
| 248 | IREE_CHECK_OK( |
| 249 | iree_hal_semaphore_create(device, 0ull, &timeline_semaphore)); |
| 250 | timeline_semaphores.push_back(std::move(timeline_semaphore)); |
| 251 | } |
| 252 | |
| 253 | // Preallocate fences and I/O for each invocation. |
| 254 | // The same inputs are used for each but we need a unique list to hold the |
| 255 | // unique fences. Each fence represents when the invocation has completed. |
| 256 | std::vector<vm::ref<iree_hal_fence_t>> invocation_fences; |
| 257 | std::vector<vm::ref<iree_vm_list_t>> invocation_inputs; |
| 258 | std::vector<vm::ref<iree_vm_list_t>> invocation_outputs; |
| 259 | vm::ref<iree_hal_fence_t> completion_fence; |
| 260 | IREE_CHECK_OK(iree_hal_fence_create(batch_concurrency, host_allocator, |
| 261 | &completion_fence)); |
| 262 | for (int32_t i = 0; i < batch_size / batch_concurrency; ++i) { |
| 263 | for (int32_t j = 0; j < batch_concurrency; ++j) { |
| 264 | // Chain each concurrent minibatch to the previous. Note that to start |
| 265 | // we wait on nothing and begin executing immediately. |
| 266 | vm::ref<iree_hal_fence_t> wait_fence; |
| 267 | if (i > 0) { |
| 268 | wait_fence = vm::retain_ref( |
| 269 | invocation_fences[(i - 1) * batch_concurrency + j]); |
| 270 | } |
| 271 | uint64_t signal_value = i + 1; |
| 272 | vm::ref<iree_hal_fence_t> signal_fence; |
| 273 | IREE_CHECK_OK(iree_hal_fence_create_at(timeline_semaphores[j].get(), |
| 274 | signal_value, host_allocator, |
| 275 | &signal_fence)); |
| 276 | invocation_fences.push_back(vm::retain_ref(signal_fence)); |
| 277 | |
| 278 | // Join the final minibatch on the completion fence. |
| 279 | if (i == batch_size / batch_concurrency - 1) { |
| 280 | IREE_CHECK_OK(iree_hal_fence_insert(completion_fence.get(), |
| 281 | timeline_semaphores[j].get(), |
| 282 | signal_value)); |
| 283 | } |
| 284 | |
| 285 | // Clone common inputs and add the invocation-specific fences. |
| 286 | vm::ref<iree_vm_list_t> inputs; |
| 287 | IREE_CHECK_OK( |
| 288 | iree_vm_list_clone(common_inputs, host_allocator, &inputs)); |
| 289 | IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), wait_fence)); |
| 290 | IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), signal_fence)); |
| 291 | invocation_inputs.push_back(std::move(inputs)); |
| 292 | |
| 293 | // Setup empty outputs. |
| 294 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 295 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 296 | host_allocator, &outputs)); |
| 297 | invocation_outputs.push_back(std::move(outputs)); |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | IREE_TRACE_ZONE_END(z_begin); |
| 302 | |
| 303 | state.ResumeTiming(); |
| 304 | { |
| 305 | // TODO(benvanik): replace with async invocations. Today if the invocation |
| 306 | // performs any waits this will block on the initial invoke instead of |
| 307 | // actually overlapping things. |
| 308 | for (int32_t i = 0; i < batch_size; ++i) { |
| 309 | IREE_CHECK_OK( |
| 310 | iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE, |
| 311 | /*policy=*/nullptr, invocation_inputs[i].get(), |
| 312 | invocation_outputs[i].get(), host_allocator)); |
| 313 | } |
| 314 | IREE_CHECK_OK( |
| 315 | iree_hal_fence_wait(completion_fence.get(), iree_infinite_timeout())); |
| 316 | } |
| 317 | state.PauseTiming(); |
| 318 | |
| 319 | IREE_TRACE_ZONE_BEGIN_NAMED(z_end, "CleanupBatch"); |
| 320 | for (int32_t i = 0; i < batch_size; ++i) { |
| 321 | iree_vm_list_clear(invocation_outputs[i].get()); |
| 322 | } |
| 323 | invocation_fences.clear(); |
| 324 | invocation_inputs.clear(); |
| 325 | invocation_outputs.clear(); |
| 326 | completion_fence.reset(); |
| 327 | timeline_semaphores.clear(); |
| 328 | IREE_TRACE_ZONE_END(z_end); |
| 329 | |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 330 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 331 | state.ResumeTiming(); |
| 332 | } |
| 333 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 334 | |
| 335 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 336 | } |
| 337 | |
| 338 | void RegisterAsyncBenchmark(const std::string& function_name, |
| 339 | iree_hal_device_t* device, |
| 340 | iree_vm_context_t* context, |
| 341 | iree_vm_function_t function, |
| 342 | iree_vm_list_t* inputs) { |
| 343 | auto benchmark_name = "BM_" + function_name; |
| 344 | int32_t batch_size = FLAG_batch_size; |
| 345 | int32_t batch_concurrency = FLAG_batch_concurrency; |
| 346 | benchmark::RegisterBenchmark( |
| 347 | benchmark_name.c_str(), |
| 348 | [=](benchmark::State& state) -> void { |
| 349 | BenchmarkAsyncFunction(benchmark_name, batch_size, batch_concurrency, |
| 350 | device, context, function, inputs, state); |
| 351 | }) |
| 352 | // By default only the main thread is included in CPU time. Include all |
| 353 | // the threads instead. |
| 354 | ->MeasureProcessCPUTime() |
| 355 | // To make single and multi-threaded benchmarks more comparable, use the |
| 356 | // wall time to determine how many iterations to run. See |
| 357 | // https://github.com/google/benchmark#cpu-timers, |
| 358 | ->UseRealTime() |
| 359 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 360 | : benchmark::kMillisecond); |
| 361 | } |
| 362 | |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 363 | static void BenchmarkDispatchFunction(const std::string& benchmark_name, |
| 364 | iree_vm_context_t* context, |
| 365 | iree_vm_function_t function, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 366 | benchmark::State& state) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 367 | IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(), |
| 368 | benchmark_name.size()); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 369 | IREE_TRACE_FRAME_MARK(); |
| 370 | |
| 371 | vm::ref<iree_vm_list_t> inputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 372 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 373 | iree_allocator_system(), &inputs)); |
| 374 | iree_vm_value_t batch_size = iree_vm_value_make_i32(FLAG_batch_size); |
| 375 | IREE_CHECK_OK(iree_vm_list_push_value(inputs.get(), &batch_size)); |
| 376 | |
| 377 | vm::ref<iree_vm_list_t> outputs; |
Ben Vanik | 09630d6 | 2023-04-13 14:21:40 -0700 | [diff] [blame] | 378 | IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16, |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 379 | iree_allocator_system(), &outputs)); |
| 380 | |
| 381 | // Benchmarking loop. |
| 382 | while (state.KeepRunningBatch(FLAG_batch_size)) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 383 | IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration"); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 384 | IREE_TRACE_FRAME_MARK_NAMED("Iteration"); |
| 385 | IREE_CHECK_OK(iree_vm_invoke( |
| 386 | context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr, |
| 387 | inputs.get(), outputs.get(), iree_allocator_system())); |
| 388 | IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0)); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 389 | IREE_TRACE_ZONE_END(z1); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 390 | } |
Ben Vanik | b4ccbfc | 2022-08-30 15:43:41 -0700 | [diff] [blame] | 391 | state.SetItemsProcessed(state.iterations()); |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 392 | |
| 393 | IREE_TRACE_ZONE_END(z0); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 394 | } |
| 395 | |
| 396 | void RegisterDispatchBenchmark(const std::string& function_name, |
| 397 | iree_vm_context_t* context, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 398 | iree_vm_function_t function) { |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 399 | auto benchmark_name = "BM_" + function_name; |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 400 | benchmark::RegisterBenchmark( |
| 401 | benchmark_name.c_str(), |
| 402 | [benchmark_name, context, function](benchmark::State& state) -> void { |
| 403 | BenchmarkDispatchFunction(benchmark_name, context, function, state); |
| 404 | }) |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 405 | // By default only the main thread is included in CPU time. Include all |
| 406 | // the threads instead. |
| 407 | ->MeasureProcessCPUTime() |
| 408 | // To make single and multi-threaded benchmarks more comparable, use the |
| 409 | // wall time to determine how many iterations to run. See |
| 410 | // https://github.com/google/benchmark#cpu-timers, |
| 411 | ->UseRealTime() |
Jerry Wu | 1ebcce3 | 2022-07-26 12:31:14 -0400 | [diff] [blame] | 412 | ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second |
| 413 | : benchmark::kMicrosecond); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 414 | } |
| 415 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 416 | // The lifetime of IREEBenchmark should be as long as |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 417 | // ::benchmark::RunSpecifiedBenchmarks() where the resources are used during |
| 418 | // benchmarking. |
| 419 | class IREEBenchmark { |
| 420 | public: |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 421 | IREEBenchmark() { iree_tooling_module_list_initialize(&module_list_); } |
Ben Vanik | b20b602 | 2021-02-16 12:59:31 -0800 | [diff] [blame] | 422 | |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 423 | ~IREEBenchmark() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 424 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::dtor"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 425 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 426 | // Order matters. Tear down modules first to release resources. |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 427 | inputs_.reset(); |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 428 | context_.reset(); |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 429 | iree_tooling_module_list_reset(&module_list_); |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 430 | instance_.reset(); |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 431 | |
| 432 | // Tear down device last in order to get accurate statistics. |
Ben Vanik | 2b8438f | 2022-08-30 16:07:41 -0700 | [diff] [blame] | 433 | if (device_allocator_ && FLAG_print_statistics) { |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 434 | IREE_IGNORE_ERROR(iree_hal_allocator_statistics_fprint( |
| 435 | stderr, device_allocator_.get())); |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 436 | } |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 437 | device_allocator_.reset(); |
| 438 | device_.reset(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 439 | }; |
| 440 | |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 441 | iree_hal_device_t* device() const { return device_.get(); } |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 442 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 443 | iree_status_t Register() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 444 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Register"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 445 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 446 | if (!instance_ || !device_allocator_ || !context_ || !module_list_.count) { |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 447 | IREE_RETURN_IF_ERROR(Init()); |
| 448 | } |
| 449 | |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 450 | auto function_name = std::string(FLAG_function); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 451 | if (!function_name.empty()) { |
| 452 | IREE_RETURN_IF_ERROR(RegisterSpecificFunction(function_name)); |
| 453 | } else { |
| 454 | IREE_RETURN_IF_ERROR(RegisterAllExportedFunctions()); |
| 455 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 456 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 457 | } |
| 458 | |
| 459 | private: |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 460 | iree_status_t Init() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 461 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Init"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 462 | IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init"); |
| 463 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 464 | iree_allocator_t host_allocator = iree_allocator_system(); |
Ben Vanik | 35bc9a1 | 2022-03-09 09:05:58 -0800 | [diff] [blame] | 465 | IREE_RETURN_IF_ERROR( |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 466 | iree_tooling_create_instance(host_allocator, &instance_)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 467 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 468 | IREE_RETURN_IF_ERROR(iree_tooling_load_modules_from_flags( |
| 469 | instance_.get(), host_allocator, &module_list_)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 470 | |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 471 | IREE_RETURN_IF_ERROR(iree_tooling_create_context_from_flags( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 472 | instance_.get(), module_list_.count, module_list_.values, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 473 | /*default_device_uri=*/iree_string_view_empty(), host_allocator, |
| 474 | &context_, &device_, &device_allocator_)); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 475 | |
| 476 | IREE_TRACE_FRAME_MARK_END_NAMED("init"); |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 477 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 478 | } |
| 479 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 480 | iree_status_t RegisterSpecificFunction(const std::string& function_name) { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 481 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterSpecificFunction"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 482 | |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 483 | iree_vm_module_t* main_module = |
| 484 | iree_tooling_module_list_back(&module_list_); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 485 | iree_vm_function_t function; |
Ben Vanik | b697e76 | 2022-06-15 12:07:58 -0700 | [diff] [blame] | 486 | IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_name( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 487 | main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 488 | iree_string_view_t{function_name.data(), function_name.size()}, |
| 489 | &function)); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 490 | |
Ben Vanik | 7958fc9 | 2023-01-12 08:45:32 -0800 | [diff] [blame] | 491 | IREE_CHECK_OK(iree_tooling_parse_to_variant_list( |
Ben Vanik | f65c5cb | 2023-02-01 11:02:10 -0800 | [diff] [blame] | 492 | device_allocator_.get(), FLAG_input_list().values, |
| 493 | FLAG_input_list().count, iree_vm_instance_allocator(instance_.get()), |
| 494 | &inputs_)); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 495 | |
| 496 | iree_string_view_t invocation_model = iree_vm_function_lookup_attr_by_name( |
| 497 | &function, IREE_SV("iree.abi.model")); |
| 498 | if (iree_string_view_equal(invocation_model, IREE_SV("coarse-fences"))) { |
| 499 | // Asynchronous invocation. |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 500 | iree::RegisterAsyncBenchmark(function_name, device_.get(), context_.get(), |
| 501 | function, inputs_.get()); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 502 | } else { |
| 503 | // Synchronous invocation. |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 504 | iree::RegisterGenericBenchmark(function_name, context_.get(), function, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 505 | inputs_.get()); |
| 506 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 507 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 508 | } |
| 509 | |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 510 | iree_status_t RegisterAllExportedFunctions() { |
Ben Vanik | cc43680 | 2023-06-10 08:53:52 -0700 | [diff] [blame] | 511 | IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterAllExportedFunctions"); |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 512 | iree_vm_module_t* main_module = |
| 513 | iree_tooling_module_list_back(&module_list_); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 514 | iree_vm_module_signature_t signature = |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 515 | iree_vm_module_signature(main_module); |
Ben Vanik | 7f3a7e3 | 2020-11-14 14:16:07 -0800 | [diff] [blame] | 516 | for (iree_host_size_t i = 0; i < signature.export_function_count; ++i) { |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 517 | iree_vm_function_t function; |
| 518 | IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal( |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 519 | main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function)); |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 520 | iree_string_view_t function_name = iree_vm_function_name(&function); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 521 | |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 522 | // We run anything with the 'benchmark' attribute. |
| 523 | // If the attribute is not present we'll run anything that looks runnable. |
Ben Vanik | 1d60c18 | 2022-06-28 12:37:40 -0700 | [diff] [blame] | 524 | iree_string_view_t benchmark_type = iree_vm_function_lookup_attr_by_name( |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 525 | &function, IREE_SV("iree.benchmark")); |
| 526 | if (iree_string_view_equal(benchmark_type, IREE_SV("dispatch"))) { |
| 527 | iree::RegisterDispatchBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 528 | std::string(function_name.data, function_name.size), context_.get(), |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 529 | function); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 530 | } else if (iree_string_view_equal(benchmark_type, IREE_SV("entry"))) { |
| 531 | iree::RegisterGenericBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 532 | std::string(function_name.data, function_name.size), context_.get(), |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 533 | function, |
Ben Vanik | 007109f | 2022-08-03 07:26:50 -0700 | [diff] [blame] | 534 | /*inputs=*/nullptr); |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 535 | } else { |
| 536 | // Pick up generic () -> () functions. |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 537 | if (iree_string_view_starts_with(function_name, |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 538 | iree_make_cstring_view("__")) || |
Ben Vanik | 6c4dd5b | 2021-10-05 15:29:23 -0700 | [diff] [blame] | 539 | iree_string_view_find_char(function_name, '$', 0) != |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 540 | IREE_STRING_VIEW_NPOS) { |
| 541 | // Skip internal or special functions. |
| 542 | continue; |
| 543 | } |
| 544 | |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 545 | // Query function information to determine how to run it. |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 546 | iree_vm_function_signature_t signature = |
| 547 | iree_vm_function_signature(&function); |
| 548 | iree_host_size_t argument_count = 0; |
| 549 | iree_host_size_t result_count = 0; |
| 550 | IREE_RETURN_IF_ERROR(iree_vm_function_call_count_arguments_and_results( |
| 551 | &signature, &argument_count, &result_count)); |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 552 | iree_string_view_t invocation_model = |
| 553 | iree_vm_function_lookup_attr_by_name(&function, |
| 554 | IREE_SV("iree.abi.model")); |
| 555 | if (iree_string_view_equal(invocation_model, |
| 556 | IREE_SV("coarse-fences"))) { |
| 557 | // Asynchronous invocation with coarse fences. Expect just those. |
| 558 | if (argument_count == 2) { |
| 559 | // Only functions taking a (wait, signal) fence pair are run. |
| 560 | iree::RegisterAsyncBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 561 | std::string(function_name.data, function_name.size), |
| 562 | device_.get(), context_.get(), function, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 563 | /*inputs=*/nullptr); |
| 564 | } |
| 565 | } else { |
| 566 | // Basic synchronous invocation. |
| 567 | if (argument_count == 0) { |
| 568 | // Only functions with no inputs are run (because we can't pass |
| 569 | // anything). |
| 570 | iree::RegisterGenericBenchmark( |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 571 | std::string(function_name.data, function_name.size), |
| 572 | context_.get(), function, |
Ben Vanik | e9ae963 | 2022-10-04 08:13:30 -0700 | [diff] [blame] | 573 | /*inputs=*/nullptr); |
| 574 | } |
Ben Vanik | 5a58aa4 | 2021-05-07 12:46:29 -0700 | [diff] [blame] | 575 | } |
Ben Vanik | f8e11fd | 2022-04-12 09:43:41 -0700 | [diff] [blame] | 576 | } |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 577 | } |
Ben Vanik | 5a26619 | 2021-05-01 15:22:06 -0700 | [diff] [blame] | 578 | return iree_ok_status(); |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 579 | } |
| 580 | |
Ben Vanik | c149d61 | 2022-11-09 01:07:17 +0000 | [diff] [blame] | 581 | iree::vm::ref<iree_vm_instance_t> instance_; |
| 582 | iree::vm::ref<iree_vm_context_t> context_; |
| 583 | iree::vm::ref<iree_hal_device_t> device_; |
| 584 | iree::vm::ref<iree_hal_allocator_t> device_allocator_; |
Ben Vanik | 9461d3b | 2023-04-18 16:39:25 -0700 | [diff] [blame] | 585 | iree_tooling_module_list_t module_list_; |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 586 | iree::vm::ref<iree_vm_list_t> inputs_; |
| 587 | }; |
Ahmed S. Taei | 7a2f5ea | 2020-10-06 20:08:53 -0700 | [diff] [blame] | 588 | } // namespace |
Geoffrey Martin-Noble | 3419026 | 2020-02-06 10:43:58 -0800 | [diff] [blame] | 589 | } // namespace iree |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 590 | |
| 591 | int main(int argc, char** argv) { |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame^] | 592 | IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree-benchmark-module"); |
Ben Vanik | 11c051a | 2020-10-21 09:58:22 -0700 | [diff] [blame] | 593 | |
Ben Vanik | 1cb2f7a | 2021-04-26 16:32:53 -0700 | [diff] [blame] | 594 | // Pass through flags to benchmark (allowing --help to fall through). |
| 595 | iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK | |
| 596 | IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP, |
| 597 | &argc, &argv); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 598 | ::benchmark::Initialize(&argc, argv); |
Ben Vanik | 1cb2f7a | 2021-04-26 16:32:53 -0700 | [diff] [blame] | 599 | |
Han-Chung Wang | 00aa2fc | 2020-10-12 02:05:39 -0700 | [diff] [blame] | 600 | iree::IREEBenchmark iree_benchmark; |
Ben Vanik | 524c8e7 | 2021-05-01 15:48:44 -0700 | [diff] [blame] | 601 | iree_status_t status = iree_benchmark.Register(); |
| 602 | if (!iree_status_is_ok(status)) { |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame^] | 603 | int exit_code = static_cast<int>(iree_status_code(status)); |
bjacob | 1cb92dd | 2022-09-26 16:21:02 +0000 | [diff] [blame] | 604 | printf("%s\n", iree::Status(std::move(status)).ToString().c_str()); |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame^] | 605 | IREE_TRACE_ZONE_END(z0); |
| 606 | IREE_TRACE_APP_EXIT(exit_code); |
| 607 | return exit_code; |
Han-Chung Wang | bb9bcd3 | 2020-10-07 08:18:05 -0700 | [diff] [blame] | 608 | } |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 609 | IREE_CHECK_OK(iree_hal_begin_profiling_from_flags(iree_benchmark.device())); |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 610 | ::benchmark::RunSpecifiedBenchmarks(); |
Ben Vanik | 7859d63 | 2022-10-24 14:37:28 -0700 | [diff] [blame] | 611 | IREE_CHECK_OK(iree_hal_end_profiling_from_flags(iree_benchmark.device())); |
Ben Vanik | 14308b1 | 2023-06-13 10:22:28 -0700 | [diff] [blame^] | 612 | |
| 613 | IREE_TRACE_ZONE_END(z0); |
| 614 | IREE_TRACE_APP_EXIT(EXIT_SUCCESS); |
| 615 | return EXIT_SUCCESS; |
Ahmed S. Taei | f1678df | 2020-08-26 20:38:50 -0700 | [diff] [blame] | 616 | } |