blob: 7441448c33b83ed190ce1f7369ad265f58929881 [file] [log] [blame]
Geoffrey Martin-Noble552d3f82021-05-25 17:56:09 -07001// Copyright 2020 The IREE Authors
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -08002//
Geoffrey Martin-Noble552d3f82021-05-25 17:56:09 -07003// Licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -08006
Ben Vanik3148a512022-04-06 12:58:06 -07007//===----------------------------------------------------------------------===//
8// iree-benchmark-module: benchmarks public functions in an IREE VM module
9//===----------------------------------------------------------------------===//
10//
11// This runs exported functions using flags specified on the command line.
12// Each function is measured independently and the numbers reported will be for
13// the full end-to-end CPU and wall times.
14//
15// From an ML perspective this is an integration benchmark for measuring total
16// user-visible latency of model entry points. It is *not* a microbenchmarking
17// tool for individual device-side dispatch functions (aka ops aka kernels).
18// If interested in the precise time of a particular dispatch then tracy,
19// executable_library_benchmark, and platform/vendor tooling (nsight, perf, etc)
20// are to be used instead and attaching them to this tool is often useful in
21// order to get a large sample set.
22//
23// By default all functions taking no inputs will be benchmarked. If a function
Ben Vanikf65c5cb2023-02-01 11:02:10 -080024// takes inputs then the user will need to specify them using --input=
Ben Vanik3148a512022-04-06 12:58:06 -070025// flags. Depending on the input program the -iree-flow-export-benchmark-funcs
26// flag can be passed to the compiler to attempt to wrap each function with
27// dummy inputs however this will fail in programs with dynamically shaped
28// inputs. The workaround for avoiding the need for flags is to provide the
29// input program in a form with no inputs from the start.
30//
31// It's important to remember that IREE is not a BLAS library and is meant to
32// run entire programs. It's not generally appropriate to benchmark a model with
33// a single matmul, for example, as that's just treating IREE as a BLAS library.
34// Note also that user-level ops in a frontend environment don't map to the
35// dispatches that IREE executes: IREE is a compiler like any other and does not
36// guarantee a source line of code translates into an atomically divisible and
37// independently measurable execution command. In other words don't expect to be
38// able to benchmark the cost of a broadcasting elementwise tf.add op within a
39// model: by the time we are running the program that's fused itself into a
40// single machine instruction operating as part of some other ops.
41//
42// For coarse dispatch testing and triaging it can still be useful to remove
43// some of the overheads introduced by whole-program execution and the compiler
Scott Todd52f62b82022-05-10 17:51:34 -070044// flag --iree-hal-benchmark-dispatch-repeat-count=N is provided to enable
Ben Vanik3148a512022-04-06 12:58:06 -070045// batching. Whatever N is chosen must then be passed to this tool via
46// --batch_size=N so that the benchmark reporting properly reflects the
Scott Todd52f62b82022-05-10 17:51:34 -070047// batching. As an example --iree-hal-benchmark-dispatch-repeat-count=32 +
Ben Vanik3148a512022-04-06 12:58:06 -070048// --batch_size=32 will reduce the overheads by 32x. Think of this as a way to
49// control the p value in Amdahl's law representing the amount of time spent in
50// dispatches relative to the rest of the program. This isn't representative of
51// how the full program will run, though, and YMMV. Always verify timings with
52// an appropriate device-specific tool before trusting the more generic and
53// higher-level numbers from this tool.
54
Ben Vanik5a58aa42021-05-07 12:46:29 -070055#include <array>
Ben Vanik931a3b12021-05-20 13:27:13 -070056#include <cstdio>
Ben Vanik931a3b12021-05-20 13:27:13 -070057#include <iterator>
58#include <string>
59#include <type_traits>
60#include <utility>
61#include <vector>
Ben Vanik5a58aa42021-05-07 12:46:29 -070062
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080063#include "benchmark/benchmark.h"
Ben Vanik931a3b12021-05-20 13:27:13 -070064#include "iree/base/api.h"
Ben Vanike28d2532021-02-03 13:44:24 -080065#include "iree/base/internal/flags.h"
Ben Vanik931a3b12021-05-20 13:27:13 -070066#include "iree/hal/api.h"
Ben Vanike9ae9632022-10-04 08:13:30 -070067#include "iree/modules/hal/types.h"
Ben Vanik007109f2022-08-03 07:26:50 -070068#include "iree/tooling/context_util.h"
Ben Vanik7859d632022-10-24 14:37:28 -070069#include "iree/tooling/device_util.h"
Ben Vanik7958fc92023-01-12 08:45:32 -080070#include "iree/tooling/vm_util.h"
Ben Vanike8a9ec42020-07-16 22:04:57 -070071#include "iree/vm/api.h"
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080072
Jerry Wu1ebcce32022-07-26 12:31:14 -040073constexpr char kNanosecondsUnitString[] = "ns";
74constexpr char kMicrosecondsUnitString[] = "us";
75constexpr char kMillisecondsUnitString[] = "ms";
76
Han-Chung Wanga43dd172021-03-11 02:44:05 +080077// TODO(hanchung): Extract the batch size using
Ben Vanik1d60c182022-06-28 12:37:40 -070078// iree_vm_function_lookup_attr_by_name.
Ben Vanike9ae9632022-10-04 08:13:30 -070079IREE_FLAG(int32_t, batch_size, 1,
80 "Number of invocations per iteration, which for dispatch benchmarks "
81 "must match the --iree-hal-benchmark-dispatch-repeat-count value "
82 "used during compilation.");
83IREE_FLAG(int32_t, batch_concurrency, 1,
84 "Number of invocations within a batch that should run concurrently.");
Han-Chung Wanga43dd172021-03-11 02:44:05 +080085
Ben Vanikf65c5cb2023-02-01 11:02:10 -080086IREE_FLAG(string, function, "",
87 "Name of a function contained in the module specified by --module= "
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -070088 "to run. If this is not set, all the exported functions will be "
89 "benchmarked and they are expected to not have input arguments.");
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080090
CindyLiud9609f22021-09-29 21:11:49 +000091IREE_FLAG(bool, print_statistics, false,
92 "Prints runtime statistics to stderr on exit.");
93
Ben Vanik7958fc92023-01-12 08:45:32 -080094IREE_FLAG_LIST(
Ben Vanikf65c5cb2023-02-01 11:02:10 -080095 string, input,
Ben Vanikebeb5fc2021-04-24 09:40:50 -070096 "An input value or buffer of the format:\n"
97 " [shape]xtype=[value]\n"
98 " 2x2xi32=1 2 3 4\n"
99 "Optionally, brackets may be used to separate the element values:\n"
100 " 2x2xi32=[[1 2][3 4]]\n"
Ben Vanika30c8402022-06-03 19:16:16 -0700101 "Raw binary files can be read to provide buffer contents:\n"
102 " 2x2xi32=@some/file.bin\n"
Ben Vanikaecb7252022-06-12 15:21:20 -0700103 "numpy npy files (from numpy.save) can be read to provide 1+ values:\n"
104 " @some.npy\n"
Ben Vanikebeb5fc2021-04-24 09:40:50 -0700105 "Each occurrence of the flag indicates an input in the order they were\n"
106 "specified on the command line.");
Han-Chung Wang19316be2020-07-17 05:53:02 -0700107
Jerry Wu1ebcce32022-07-26 12:31:14 -0400108static iree_status_t parse_time_unit(iree_string_view_t flag_name,
109 void* storage, iree_string_view_t value) {
110 auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage;
111 auto unit_string = std::string(value.data, value.size);
112 if (unit_string == kMillisecondsUnitString) {
113 *unit = {true, benchmark::kMillisecond};
114 return iree_ok_status();
115 } else if (unit_string == kMicrosecondsUnitString) {
116 *unit = {true, benchmark::kMicrosecond};
117 return iree_ok_status();
118 } else if (unit_string == kNanosecondsUnitString) {
119 *unit = {true, benchmark::kNanosecond};
120 return iree_ok_status();
121 }
122 return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
123 "unsupported time unit");
124}
125static void print_time_unit(iree_string_view_t flag_name, void* storage,
126 FILE* file) {
127 auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage;
128 if (!unit->first) {
129 return;
130 }
131 std::string unit_string;
132 switch (unit->second) {
133 case benchmark::kMillisecond:
134 unit_string = kMillisecondsUnitString;
135 break;
136 case benchmark::kMicrosecond:
137 unit_string = kMicrosecondsUnitString;
138 break;
139 case benchmark::kNanosecond:
140 unit_string = kNanosecondsUnitString;
141 break;
142 default:
143 assert(false && "Unexpected time unit.");
144 }
145 fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
146 unit_string.c_str());
147}
148// Time unit to be printed. If the first field is false, each place will use its
149// default time unit.
150static std::pair<bool, benchmark::TimeUnit> FLAG_time_unit = {
151 false, benchmark::kNanosecond};
152IREE_FLAG_CALLBACK(
153 parse_time_unit, print_time_unit, &FLAG_time_unit, time_unit,
154 "The time unit to be printed in the results. Can be 'ms', 'us', or 'ns'.");
155
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -0800156namespace iree {
157namespace {
158
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700159static void BenchmarkGenericFunction(const std::string& benchmark_name,
Ben Vanike9ae9632022-10-04 08:13:30 -0700160 int32_t batch_size,
161 iree_vm_context_t* context,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700162 iree_vm_function_t function,
163 iree_vm_list_t* inputs,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700164 benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700165 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
166 benchmark_name.size());
Ben Vanik11c051a2020-10-21 09:58:22 -0700167 IREE_TRACE_FRAME_MARK();
168
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700169 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700170 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700171 iree_allocator_system(), &outputs));
172
Ben Vanik11c051a2020-10-21 09:58:22 -0700173 // Benchmarking loop.
Han-Chung Wanga43dd172021-03-11 02:44:05 +0800174 while (state.KeepRunningBatch(batch_size)) {
Ben Vanikcc436802023-06-10 08:53:52 -0700175 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
Ben Vanik11c051a2020-10-21 09:58:22 -0700176 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
Ben Vanik89e95302021-10-05 17:05:39 -0700177 IREE_CHECK_OK(iree_vm_invoke(
178 context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr,
179 inputs, outputs.get(), iree_allocator_system()));
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700180 IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0));
Ben Vanikcc436802023-06-10 08:53:52 -0700181 IREE_TRACE_ZONE_END(z1);
Ben Vanik11c051a2020-10-21 09:58:22 -0700182 }
Ben Vanikb4ccbfc2022-08-30 15:43:41 -0700183 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700184
185 IREE_TRACE_ZONE_END(z0);
Ben Vanik11c051a2020-10-21 09:58:22 -0700186}
187
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700188void RegisterGenericBenchmark(const std::string& function_name,
Ben Vanik5a58aa42021-05-07 12:46:29 -0700189 iree_vm_context_t* context,
190 iree_vm_function_t function,
Ben Vanik007109f2022-08-03 07:26:50 -0700191 iree_vm_list_t* inputs) {
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700192 auto benchmark_name = "BM_" + function_name;
Ben Vanike9ae9632022-10-04 08:13:30 -0700193 int32_t batch_size = FLAG_batch_size;
Ben Vanik007109f2022-08-03 07:26:50 -0700194 benchmark::RegisterBenchmark(benchmark_name.c_str(),
Ben Vanike9ae9632022-10-04 08:13:30 -0700195 [=](benchmark::State& state) -> void {
Ben Vanik007109f2022-08-03 07:26:50 -0700196 BenchmarkGenericFunction(
197 benchmark_name, batch_size, context,
198 function, inputs, state);
199 })
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700200 // By default only the main thread is included in CPU time. Include all
201 // the threads instead.
202 ->MeasureProcessCPUTime()
203 // To make single and multi-threaded benchmarks more comparable, use the
204 // wall time to determine how many iterations to run. See
205 // https://github.com/google/benchmark#cpu-timers,
206 ->UseRealTime()
Jerry Wu1ebcce32022-07-26 12:31:14 -0400207 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
208 : benchmark::kMillisecond);
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700209}
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700210
Ben Vanike9ae9632022-10-04 08:13:30 -0700211// Runs up to |batch_size| pipelined invocations in sequence along with
212// concurrency. Example:
213// batch_size=1, concurrency=1:
214// [invocation 0]
215// batch_size=2, concurrency=1:
216// [invocation 0] -> [invocation 1]
217// batch_size=2, concurrency=2:
218// [invocation 0]
219// [invocation 1]
220// batch_size=4, concurrency=2:
221// [invocation 0] -> [invocation 2]
222// [invocation 1] -> [invocation 3]
223static void BenchmarkAsyncFunction(
224 const std::string& benchmark_name, int32_t batch_size,
225 int32_t batch_concurrency, iree_hal_device_t* device,
226 iree_vm_context_t* context, iree_vm_function_t function,
227 iree_vm_list_t* common_inputs, benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700228 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
229 benchmark_name.size());
Ben Vanike9ae9632022-10-04 08:13:30 -0700230 IREE_TRACE_FRAME_MARK();
231 iree_allocator_t host_allocator = iree_allocator_system();
232
233 // Round up batch size to some multiple of concurrency.
234 batch_size = (int32_t)iree_host_align(batch_size, batch_concurrency);
235
236 // Benchmarking loop.
237 while (state.KeepRunningBatch(batch_size)) {
Ben Vanike9ae9632022-10-04 08:13:30 -0700238 state.PauseTiming();
Ben Vanikcc436802023-06-10 08:53:52 -0700239 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
240 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
Ben Vanike9ae9632022-10-04 08:13:30 -0700241
242 IREE_TRACE_ZONE_BEGIN_NAMED(z_begin, "PrepareBatch");
243
244 // Each concurrent track of execution gets its own semaphore.
245 std::vector<vm::ref<iree_hal_semaphore_t>> timeline_semaphores;
246 for (int32_t i = 0; i < batch_concurrency; ++i) {
247 vm::ref<iree_hal_semaphore_t> timeline_semaphore;
248 IREE_CHECK_OK(
249 iree_hal_semaphore_create(device, 0ull, &timeline_semaphore));
250 timeline_semaphores.push_back(std::move(timeline_semaphore));
251 }
252
253 // Preallocate fences and I/O for each invocation.
254 // The same inputs are used for each but we need a unique list to hold the
255 // unique fences. Each fence represents when the invocation has completed.
256 std::vector<vm::ref<iree_hal_fence_t>> invocation_fences;
257 std::vector<vm::ref<iree_vm_list_t>> invocation_inputs;
258 std::vector<vm::ref<iree_vm_list_t>> invocation_outputs;
259 vm::ref<iree_hal_fence_t> completion_fence;
260 IREE_CHECK_OK(iree_hal_fence_create(batch_concurrency, host_allocator,
261 &completion_fence));
262 for (int32_t i = 0; i < batch_size / batch_concurrency; ++i) {
263 for (int32_t j = 0; j < batch_concurrency; ++j) {
264 // Chain each concurrent minibatch to the previous. Note that to start
265 // we wait on nothing and begin executing immediately.
266 vm::ref<iree_hal_fence_t> wait_fence;
267 if (i > 0) {
268 wait_fence = vm::retain_ref(
269 invocation_fences[(i - 1) * batch_concurrency + j]);
270 }
271 uint64_t signal_value = i + 1;
272 vm::ref<iree_hal_fence_t> signal_fence;
273 IREE_CHECK_OK(iree_hal_fence_create_at(timeline_semaphores[j].get(),
274 signal_value, host_allocator,
275 &signal_fence));
276 invocation_fences.push_back(vm::retain_ref(signal_fence));
277
278 // Join the final minibatch on the completion fence.
279 if (i == batch_size / batch_concurrency - 1) {
280 IREE_CHECK_OK(iree_hal_fence_insert(completion_fence.get(),
281 timeline_semaphores[j].get(),
282 signal_value));
283 }
284
285 // Clone common inputs and add the invocation-specific fences.
286 vm::ref<iree_vm_list_t> inputs;
287 IREE_CHECK_OK(
288 iree_vm_list_clone(common_inputs, host_allocator, &inputs));
289 IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), wait_fence));
290 IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), signal_fence));
291 invocation_inputs.push_back(std::move(inputs));
292
293 // Setup empty outputs.
294 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700295 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanike9ae9632022-10-04 08:13:30 -0700296 host_allocator, &outputs));
297 invocation_outputs.push_back(std::move(outputs));
298 }
299 }
300
301 IREE_TRACE_ZONE_END(z_begin);
302
303 state.ResumeTiming();
304 {
305 // TODO(benvanik): replace with async invocations. Today if the invocation
306 // performs any waits this will block on the initial invoke instead of
307 // actually overlapping things.
308 for (int32_t i = 0; i < batch_size; ++i) {
309 IREE_CHECK_OK(
310 iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE,
311 /*policy=*/nullptr, invocation_inputs[i].get(),
312 invocation_outputs[i].get(), host_allocator));
313 }
314 IREE_CHECK_OK(
315 iree_hal_fence_wait(completion_fence.get(), iree_infinite_timeout()));
316 }
317 state.PauseTiming();
318
319 IREE_TRACE_ZONE_BEGIN_NAMED(z_end, "CleanupBatch");
320 for (int32_t i = 0; i < batch_size; ++i) {
321 iree_vm_list_clear(invocation_outputs[i].get());
322 }
323 invocation_fences.clear();
324 invocation_inputs.clear();
325 invocation_outputs.clear();
326 completion_fence.reset();
327 timeline_semaphores.clear();
328 IREE_TRACE_ZONE_END(z_end);
329
Ben Vanikcc436802023-06-10 08:53:52 -0700330 IREE_TRACE_ZONE_END(z1);
Ben Vanike9ae9632022-10-04 08:13:30 -0700331 state.ResumeTiming();
332 }
333 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700334
335 IREE_TRACE_ZONE_END(z0);
Ben Vanike9ae9632022-10-04 08:13:30 -0700336}
337
338void RegisterAsyncBenchmark(const std::string& function_name,
339 iree_hal_device_t* device,
340 iree_vm_context_t* context,
341 iree_vm_function_t function,
342 iree_vm_list_t* inputs) {
343 auto benchmark_name = "BM_" + function_name;
344 int32_t batch_size = FLAG_batch_size;
345 int32_t batch_concurrency = FLAG_batch_concurrency;
346 benchmark::RegisterBenchmark(
347 benchmark_name.c_str(),
348 [=](benchmark::State& state) -> void {
349 BenchmarkAsyncFunction(benchmark_name, batch_size, batch_concurrency,
350 device, context, function, inputs, state);
351 })
352 // By default only the main thread is included in CPU time. Include all
353 // the threads instead.
354 ->MeasureProcessCPUTime()
355 // To make single and multi-threaded benchmarks more comparable, use the
356 // wall time to determine how many iterations to run. See
357 // https://github.com/google/benchmark#cpu-timers,
358 ->UseRealTime()
359 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
360 : benchmark::kMillisecond);
361}
362
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700363static void BenchmarkDispatchFunction(const std::string& benchmark_name,
364 iree_vm_context_t* context,
365 iree_vm_function_t function,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700366 benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700367 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
368 benchmark_name.size());
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700369 IREE_TRACE_FRAME_MARK();
370
371 vm::ref<iree_vm_list_t> inputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700372 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700373 iree_allocator_system(), &inputs));
374 iree_vm_value_t batch_size = iree_vm_value_make_i32(FLAG_batch_size);
375 IREE_CHECK_OK(iree_vm_list_push_value(inputs.get(), &batch_size));
376
377 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700378 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700379 iree_allocator_system(), &outputs));
380
381 // Benchmarking loop.
382 while (state.KeepRunningBatch(FLAG_batch_size)) {
Ben Vanikcc436802023-06-10 08:53:52 -0700383 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700384 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
385 IREE_CHECK_OK(iree_vm_invoke(
386 context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr,
387 inputs.get(), outputs.get(), iree_allocator_system()));
388 IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0));
Ben Vanikcc436802023-06-10 08:53:52 -0700389 IREE_TRACE_ZONE_END(z1);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700390 }
Ben Vanikb4ccbfc2022-08-30 15:43:41 -0700391 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700392
393 IREE_TRACE_ZONE_END(z0);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700394}
395
396void RegisterDispatchBenchmark(const std::string& function_name,
397 iree_vm_context_t* context,
Ben Vanik007109f2022-08-03 07:26:50 -0700398 iree_vm_function_t function) {
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700399 auto benchmark_name = "BM_" + function_name;
Ben Vanik007109f2022-08-03 07:26:50 -0700400 benchmark::RegisterBenchmark(
401 benchmark_name.c_str(),
402 [benchmark_name, context, function](benchmark::State& state) -> void {
403 BenchmarkDispatchFunction(benchmark_name, context, function, state);
404 })
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700405 // By default only the main thread is included in CPU time. Include all
406 // the threads instead.
407 ->MeasureProcessCPUTime()
408 // To make single and multi-threaded benchmarks more comparable, use the
409 // wall time to determine how many iterations to run. See
410 // https://github.com/google/benchmark#cpu-timers,
411 ->UseRealTime()
Jerry Wu1ebcce32022-07-26 12:31:14 -0400412 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
413 : benchmark::kMicrosecond);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700414}
415
Ben Vanik007109f2022-08-03 07:26:50 -0700416// The lifetime of IREEBenchmark should be as long as
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700417// ::benchmark::RunSpecifiedBenchmarks() where the resources are used during
418// benchmarking.
419class IREEBenchmark {
420 public:
Ben Vanik9461d3b2023-04-18 16:39:25 -0700421 IREEBenchmark() { iree_tooling_module_list_initialize(&module_list_); }
Ben Vanikb20b6022021-02-16 12:59:31 -0800422
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700423 ~IREEBenchmark() {
Ben Vanikcc436802023-06-10 08:53:52 -0700424 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::dtor");
Ben Vanik11c051a2020-10-21 09:58:22 -0700425
Ben Vanik007109f2022-08-03 07:26:50 -0700426 // Order matters. Tear down modules first to release resources.
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700427 inputs_.reset();
Ben Vanikc149d612022-11-09 01:07:17 +0000428 context_.reset();
Ben Vanik9461d3b2023-04-18 16:39:25 -0700429 iree_tooling_module_list_reset(&module_list_);
Ben Vanikc149d612022-11-09 01:07:17 +0000430 instance_.reset();
Ben Vanik007109f2022-08-03 07:26:50 -0700431
432 // Tear down device last in order to get accurate statistics.
Ben Vanik2b8438f2022-08-30 16:07:41 -0700433 if (device_allocator_ && FLAG_print_statistics) {
Ben Vanikc149d612022-11-09 01:07:17 +0000434 IREE_IGNORE_ERROR(iree_hal_allocator_statistics_fprint(
435 stderr, device_allocator_.get()));
Ben Vanik007109f2022-08-03 07:26:50 -0700436 }
Ben Vanikc149d612022-11-09 01:07:17 +0000437 device_allocator_.reset();
438 device_.reset();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700439 };
440
Ben Vanikc149d612022-11-09 01:07:17 +0000441 iree_hal_device_t* device() const { return device_.get(); }
Ben Vanik7859d632022-10-24 14:37:28 -0700442
Ben Vanik5a266192021-05-01 15:22:06 -0700443 iree_status_t Register() {
Ben Vanikcc436802023-06-10 08:53:52 -0700444 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Register");
Ben Vanik11c051a2020-10-21 09:58:22 -0700445
Ben Vanik9461d3b2023-04-18 16:39:25 -0700446 if (!instance_ || !device_allocator_ || !context_ || !module_list_.count) {
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700447 IREE_RETURN_IF_ERROR(Init());
448 }
449
Ben Vanikf65c5cb2023-02-01 11:02:10 -0800450 auto function_name = std::string(FLAG_function);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700451 if (!function_name.empty()) {
452 IREE_RETURN_IF_ERROR(RegisterSpecificFunction(function_name));
453 } else {
454 IREE_RETURN_IF_ERROR(RegisterAllExportedFunctions());
455 }
Ben Vanik5a266192021-05-01 15:22:06 -0700456 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700457 }
458
459 private:
Ben Vanik5a266192021-05-01 15:22:06 -0700460 iree_status_t Init() {
Ben Vanikcc436802023-06-10 08:53:52 -0700461 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Init");
Ben Vanik11c051a2020-10-21 09:58:22 -0700462 IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init");
463
Ben Vanik007109f2022-08-03 07:26:50 -0700464 iree_allocator_t host_allocator = iree_allocator_system();
Ben Vanik35bc9a12022-03-09 09:05:58 -0800465 IREE_RETURN_IF_ERROR(
Ben Vanik007109f2022-08-03 07:26:50 -0700466 iree_tooling_create_instance(host_allocator, &instance_));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700467
Ben Vanik9461d3b2023-04-18 16:39:25 -0700468 IREE_RETURN_IF_ERROR(iree_tooling_load_modules_from_flags(
469 instance_.get(), host_allocator, &module_list_));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700470
Ben Vanik007109f2022-08-03 07:26:50 -0700471 IREE_RETURN_IF_ERROR(iree_tooling_create_context_from_flags(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700472 instance_.get(), module_list_.count, module_list_.values,
Ben Vanik007109f2022-08-03 07:26:50 -0700473 /*default_device_uri=*/iree_string_view_empty(), host_allocator,
474 &context_, &device_, &device_allocator_));
Ben Vanik11c051a2020-10-21 09:58:22 -0700475
476 IREE_TRACE_FRAME_MARK_END_NAMED("init");
Ben Vanik5a266192021-05-01 15:22:06 -0700477 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700478 }
479
Ben Vanik5a266192021-05-01 15:22:06 -0700480 iree_status_t RegisterSpecificFunction(const std::string& function_name) {
Ben Vanikcc436802023-06-10 08:53:52 -0700481 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterSpecificFunction");
Ben Vanik11c051a2020-10-21 09:58:22 -0700482
Ben Vanik9461d3b2023-04-18 16:39:25 -0700483 iree_vm_module_t* main_module =
484 iree_tooling_module_list_back(&module_list_);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700485 iree_vm_function_t function;
Ben Vanikb697e762022-06-15 12:07:58 -0700486 IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_name(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700487 main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT,
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700488 iree_string_view_t{function_name.data(), function_name.size()},
489 &function));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700490
Ben Vanik7958fc92023-01-12 08:45:32 -0800491 IREE_CHECK_OK(iree_tooling_parse_to_variant_list(
Ben Vanikf65c5cb2023-02-01 11:02:10 -0800492 device_allocator_.get(), FLAG_input_list().values,
493 FLAG_input_list().count, iree_vm_instance_allocator(instance_.get()),
494 &inputs_));
Ben Vanike9ae9632022-10-04 08:13:30 -0700495
496 iree_string_view_t invocation_model = iree_vm_function_lookup_attr_by_name(
497 &function, IREE_SV("iree.abi.model"));
498 if (iree_string_view_equal(invocation_model, IREE_SV("coarse-fences"))) {
499 // Asynchronous invocation.
Ben Vanikc149d612022-11-09 01:07:17 +0000500 iree::RegisterAsyncBenchmark(function_name, device_.get(), context_.get(),
501 function, inputs_.get());
Ben Vanike9ae9632022-10-04 08:13:30 -0700502 } else {
503 // Synchronous invocation.
Ben Vanikc149d612022-11-09 01:07:17 +0000504 iree::RegisterGenericBenchmark(function_name, context_.get(), function,
Ben Vanike9ae9632022-10-04 08:13:30 -0700505 inputs_.get());
506 }
Ben Vanik5a266192021-05-01 15:22:06 -0700507 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700508 }
509
Ben Vanik5a266192021-05-01 15:22:06 -0700510 iree_status_t RegisterAllExportedFunctions() {
Ben Vanikcc436802023-06-10 08:53:52 -0700511 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterAllExportedFunctions");
Ben Vanik9461d3b2023-04-18 16:39:25 -0700512 iree_vm_module_t* main_module =
513 iree_tooling_module_list_back(&module_list_);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700514 iree_vm_module_signature_t signature =
Ben Vanik9461d3b2023-04-18 16:39:25 -0700515 iree_vm_module_signature(main_module);
Ben Vanik7f3a7e32020-11-14 14:16:07 -0800516 for (iree_host_size_t i = 0; i < signature.export_function_count; ++i) {
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700517 iree_vm_function_t function;
518 IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700519 main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700520 iree_string_view_t function_name = iree_vm_function_name(&function);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700521
Ben Vanik5a58aa42021-05-07 12:46:29 -0700522 // We run anything with the 'benchmark' attribute.
523 // If the attribute is not present we'll run anything that looks runnable.
Ben Vanik1d60c182022-06-28 12:37:40 -0700524 iree_string_view_t benchmark_type = iree_vm_function_lookup_attr_by_name(
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700525 &function, IREE_SV("iree.benchmark"));
526 if (iree_string_view_equal(benchmark_type, IREE_SV("dispatch"))) {
527 iree::RegisterDispatchBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000528 std::string(function_name.data, function_name.size), context_.get(),
Ben Vanik007109f2022-08-03 07:26:50 -0700529 function);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700530 } else if (iree_string_view_equal(benchmark_type, IREE_SV("entry"))) {
531 iree::RegisterGenericBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000532 std::string(function_name.data, function_name.size), context_.get(),
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700533 function,
Ben Vanik007109f2022-08-03 07:26:50 -0700534 /*inputs=*/nullptr);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700535 } else {
536 // Pick up generic () -> () functions.
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700537 if (iree_string_view_starts_with(function_name,
Ben Vanik5a58aa42021-05-07 12:46:29 -0700538 iree_make_cstring_view("__")) ||
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700539 iree_string_view_find_char(function_name, '$', 0) !=
Ben Vanik5a58aa42021-05-07 12:46:29 -0700540 IREE_STRING_VIEW_NPOS) {
541 // Skip internal or special functions.
542 continue;
543 }
544
Ben Vanike9ae9632022-10-04 08:13:30 -0700545 // Query function information to determine how to run it.
Ben Vanik5a58aa42021-05-07 12:46:29 -0700546 iree_vm_function_signature_t signature =
547 iree_vm_function_signature(&function);
548 iree_host_size_t argument_count = 0;
549 iree_host_size_t result_count = 0;
550 IREE_RETURN_IF_ERROR(iree_vm_function_call_count_arguments_and_results(
551 &signature, &argument_count, &result_count));
Ben Vanike9ae9632022-10-04 08:13:30 -0700552 iree_string_view_t invocation_model =
553 iree_vm_function_lookup_attr_by_name(&function,
554 IREE_SV("iree.abi.model"));
555 if (iree_string_view_equal(invocation_model,
556 IREE_SV("coarse-fences"))) {
557 // Asynchronous invocation with coarse fences. Expect just those.
558 if (argument_count == 2) {
559 // Only functions taking a (wait, signal) fence pair are run.
560 iree::RegisterAsyncBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000561 std::string(function_name.data, function_name.size),
562 device_.get(), context_.get(), function,
Ben Vanike9ae9632022-10-04 08:13:30 -0700563 /*inputs=*/nullptr);
564 }
565 } else {
566 // Basic synchronous invocation.
567 if (argument_count == 0) {
568 // Only functions with no inputs are run (because we can't pass
569 // anything).
570 iree::RegisterGenericBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000571 std::string(function_name.data, function_name.size),
572 context_.get(), function,
Ben Vanike9ae9632022-10-04 08:13:30 -0700573 /*inputs=*/nullptr);
574 }
Ben Vanik5a58aa42021-05-07 12:46:29 -0700575 }
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700576 }
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700577 }
Ben Vanik5a266192021-05-01 15:22:06 -0700578 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700579 }
580
Ben Vanikc149d612022-11-09 01:07:17 +0000581 iree::vm::ref<iree_vm_instance_t> instance_;
582 iree::vm::ref<iree_vm_context_t> context_;
583 iree::vm::ref<iree_hal_device_t> device_;
584 iree::vm::ref<iree_hal_allocator_t> device_allocator_;
Ben Vanik9461d3b2023-04-18 16:39:25 -0700585 iree_tooling_module_list_t module_list_;
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700586 iree::vm::ref<iree_vm_list_t> inputs_;
587};
Ahmed S. Taei7a2f5ea2020-10-06 20:08:53 -0700588} // namespace
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -0800589} // namespace iree
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700590
591int main(int argc, char** argv) {
Ben Vanik14308b12023-06-13 10:22:28 -0700592 IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree-benchmark-module");
Ben Vanik11c051a2020-10-21 09:58:22 -0700593
Ben Vanik1cb2f7a2021-04-26 16:32:53 -0700594 // Pass through flags to benchmark (allowing --help to fall through).
595 iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK |
596 IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP,
597 &argc, &argv);
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700598 ::benchmark::Initialize(&argc, argv);
Ben Vanik1cb2f7a2021-04-26 16:32:53 -0700599
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700600 iree::IREEBenchmark iree_benchmark;
Ben Vanik524c8e72021-05-01 15:48:44 -0700601 iree_status_t status = iree_benchmark.Register();
602 if (!iree_status_is_ok(status)) {
Ben Vanik14308b12023-06-13 10:22:28 -0700603 int exit_code = static_cast<int>(iree_status_code(status));
bjacob1cb92dd2022-09-26 16:21:02 +0000604 printf("%s\n", iree::Status(std::move(status)).ToString().c_str());
Ben Vanik14308b12023-06-13 10:22:28 -0700605 IREE_TRACE_ZONE_END(z0);
606 IREE_TRACE_APP_EXIT(exit_code);
607 return exit_code;
Han-Chung Wangbb9bcd32020-10-07 08:18:05 -0700608 }
Ben Vanik7859d632022-10-24 14:37:28 -0700609 IREE_CHECK_OK(iree_hal_begin_profiling_from_flags(iree_benchmark.device()));
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700610 ::benchmark::RunSpecifiedBenchmarks();
Ben Vanik7859d632022-10-24 14:37:28 -0700611 IREE_CHECK_OK(iree_hal_end_profiling_from_flags(iree_benchmark.device()));
Ben Vanik14308b12023-06-13 10:22:28 -0700612
613 IREE_TRACE_ZONE_END(z0);
614 IREE_TRACE_APP_EXIT(EXIT_SUCCESS);
615 return EXIT_SUCCESS;
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700616}