blob: ec031fd0e8853f22bb061d145d497d48f609815b [file] [log] [blame]
Geoffrey Martin-Noble552d3f82021-05-25 17:56:09 -07001// Copyright 2020 The IREE Authors
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -08002//
Geoffrey Martin-Noble552d3f82021-05-25 17:56:09 -07003// Licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -08006
Ben Vanik3148a512022-04-06 12:58:06 -07007//===----------------------------------------------------------------------===//
8// iree-benchmark-module: benchmarks public functions in an IREE VM module
9//===----------------------------------------------------------------------===//
10//
11// This runs exported functions using flags specified on the command line.
12// Each function is measured independently and the numbers reported will be for
13// the full end-to-end CPU and wall times.
14//
15// From an ML perspective this is an integration benchmark for measuring total
16// user-visible latency of model entry points. It is *not* a microbenchmarking
17// tool for individual device-side dispatch functions (aka ops aka kernels).
18// If interested in the precise time of a particular dispatch then tracy,
19// executable_library_benchmark, and platform/vendor tooling (nsight, perf, etc)
20// are to be used instead and attaching them to this tool is often useful in
21// order to get a large sample set.
22//
23// By default all functions taking no inputs will be benchmarked. If a function
Ben Vanikf65c5cb2023-02-01 11:02:10 -080024// takes inputs then the user will need to specify them using --input=
Ben Vanik3148a512022-04-06 12:58:06 -070025// flags. Depending on the input program the -iree-flow-export-benchmark-funcs
26// flag can be passed to the compiler to attempt to wrap each function with
27// dummy inputs however this will fail in programs with dynamically shaped
28// inputs. The workaround for avoiding the need for flags is to provide the
29// input program in a form with no inputs from the start.
30//
31// It's important to remember that IREE is not a BLAS library and is meant to
32// run entire programs. It's not generally appropriate to benchmark a model with
33// a single matmul, for example, as that's just treating IREE as a BLAS library.
34// Note also that user-level ops in a frontend environment don't map to the
35// dispatches that IREE executes: IREE is a compiler like any other and does not
36// guarantee a source line of code translates into an atomically divisible and
37// independently measurable execution command. In other words don't expect to be
38// able to benchmark the cost of a broadcasting elementwise tf.add op within a
39// model: by the time we are running the program that's fused itself into a
40// single machine instruction operating as part of some other ops.
41//
42// For coarse dispatch testing and triaging it can still be useful to remove
43// some of the overheads introduced by whole-program execution and the compiler
Scott Todd52f62b82022-05-10 17:51:34 -070044// flag --iree-hal-benchmark-dispatch-repeat-count=N is provided to enable
Ben Vanik3148a512022-04-06 12:58:06 -070045// batching. Whatever N is chosen must then be passed to this tool via
46// --batch_size=N so that the benchmark reporting properly reflects the
Scott Todd52f62b82022-05-10 17:51:34 -070047// batching. As an example --iree-hal-benchmark-dispatch-repeat-count=32 +
Ben Vanik3148a512022-04-06 12:58:06 -070048// --batch_size=32 will reduce the overheads by 32x. Think of this as a way to
49// control the p value in Amdahl's law representing the amount of time spent in
50// dispatches relative to the rest of the program. This isn't representative of
51// how the full program will run, though, and YMMV. Always verify timings with
52// an appropriate device-specific tool before trusting the more generic and
53// higher-level numbers from this tool.
54
Ben Vanik5a58aa42021-05-07 12:46:29 -070055#include <array>
Ben Vanik931a3b12021-05-20 13:27:13 -070056#include <cstdio>
Ben Vanik931a3b12021-05-20 13:27:13 -070057#include <iterator>
58#include <string>
59#include <type_traits>
60#include <utility>
61#include <vector>
Ben Vanik5a58aa42021-05-07 12:46:29 -070062
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080063#include "benchmark/benchmark.h"
Ben Vanik931a3b12021-05-20 13:27:13 -070064#include "iree/base/api.h"
Ben Vanike28d2532021-02-03 13:44:24 -080065#include "iree/base/internal/flags.h"
Ben Vanik931a3b12021-05-20 13:27:13 -070066#include "iree/hal/api.h"
Ben Vanike9ae9632022-10-04 08:13:30 -070067#include "iree/modules/hal/types.h"
Ben Vanik007109f2022-08-03 07:26:50 -070068#include "iree/tooling/context_util.h"
Ben Vanik7859d632022-10-24 14:37:28 -070069#include "iree/tooling/device_util.h"
Ben Vanik30901f52024-02-08 11:23:21 -080070#include "iree/tooling/function_io.h"
Ben Vanike8a9ec42020-07-16 22:04:57 -070071#include "iree/vm/api.h"
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080072
Jerry Wu1ebcce32022-07-26 12:31:14 -040073constexpr char kNanosecondsUnitString[] = "ns";
74constexpr char kMicrosecondsUnitString[] = "us";
75constexpr char kMillisecondsUnitString[] = "ms";
76
Han-Chung Wanga43dd172021-03-11 02:44:05 +080077// TODO(hanchung): Extract the batch size using
Ben Vanik1d60c182022-06-28 12:37:40 -070078// iree_vm_function_lookup_attr_by_name.
Ben Vanike9ae9632022-10-04 08:13:30 -070079IREE_FLAG(int32_t, batch_size, 1,
80 "Number of invocations per iteration, which for dispatch benchmarks "
81 "must match the --iree-hal-benchmark-dispatch-repeat-count value "
82 "used during compilation.");
83IREE_FLAG(int32_t, batch_concurrency, 1,
84 "Number of invocations within a batch that should run concurrently.");
Han-Chung Wanga43dd172021-03-11 02:44:05 +080085
Ben Vanikf65c5cb2023-02-01 11:02:10 -080086IREE_FLAG(string, function, "",
87 "Name of a function contained in the module specified by --module= "
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -070088 "to run. If this is not set, all the exported functions will be "
89 "benchmarked and they are expected to not have input arguments.");
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -080090
CindyLiud9609f22021-09-29 21:11:49 +000091IREE_FLAG(bool, print_statistics, false,
92 "Prints runtime statistics to stderr on exit.");
93
Ben Vanik7958fc92023-01-12 08:45:32 -080094IREE_FLAG_LIST(
Ben Vanikf65c5cb2023-02-01 11:02:10 -080095 string, input,
Ben Vanikebeb5fc2021-04-24 09:40:50 -070096 "An input value or buffer of the format:\n"
97 " [shape]xtype=[value]\n"
98 " 2x2xi32=1 2 3 4\n"
99 "Optionally, brackets may be used to separate the element values:\n"
100 " 2x2xi32=[[1 2][3 4]]\n"
Ben Vanika30c8402022-06-03 19:16:16 -0700101 "Raw binary files can be read to provide buffer contents:\n"
102 " 2x2xi32=@some/file.bin\n"
Ben Vanikaecb7252022-06-12 15:21:20 -0700103 "numpy npy files (from numpy.save) can be read to provide 1+ values:\n"
104 " @some.npy\n"
Ben Vanikebeb5fc2021-04-24 09:40:50 -0700105 "Each occurrence of the flag indicates an input in the order they were\n"
106 "specified on the command line.");
Han-Chung Wang19316be2020-07-17 05:53:02 -0700107
Jerry Wu1ebcce32022-07-26 12:31:14 -0400108static iree_status_t parse_time_unit(iree_string_view_t flag_name,
109 void* storage, iree_string_view_t value) {
110 auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage;
111 auto unit_string = std::string(value.data, value.size);
112 if (unit_string == kMillisecondsUnitString) {
113 *unit = {true, benchmark::kMillisecond};
114 return iree_ok_status();
115 } else if (unit_string == kMicrosecondsUnitString) {
116 *unit = {true, benchmark::kMicrosecond};
117 return iree_ok_status();
118 } else if (unit_string == kNanosecondsUnitString) {
119 *unit = {true, benchmark::kNanosecond};
120 return iree_ok_status();
121 }
122 return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
123 "unsupported time unit");
124}
125static void print_time_unit(iree_string_view_t flag_name, void* storage,
126 FILE* file) {
127 auto* unit = (std::pair<bool, benchmark::TimeUnit>*)storage;
128 if (!unit->first) {
129 return;
130 }
131 std::string unit_string;
132 switch (unit->second) {
133 case benchmark::kMillisecond:
134 unit_string = kMillisecondsUnitString;
135 break;
136 case benchmark::kMicrosecond:
137 unit_string = kMicrosecondsUnitString;
138 break;
139 case benchmark::kNanosecond:
140 unit_string = kNanosecondsUnitString;
141 break;
142 default:
143 assert(false && "Unexpected time unit.");
144 }
145 fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
146 unit_string.c_str());
147}
148// Time unit to be printed. If the first field is false, each place will use its
149// default time unit.
150static std::pair<bool, benchmark::TimeUnit> FLAG_time_unit = {
151 false, benchmark::kNanosecond};
152IREE_FLAG_CALLBACK(
153 parse_time_unit, print_time_unit, &FLAG_time_unit, time_unit,
154 "The time unit to be printed in the results. Can be 'ms', 'us', or 'ns'.");
155
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -0800156namespace iree {
157namespace {
158
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700159static void BenchmarkGenericFunction(const std::string& benchmark_name,
Ben Vanike9ae9632022-10-04 08:13:30 -0700160 int32_t batch_size,
Ben Vanik82be9252023-08-25 11:12:18 -0700161 iree_hal_device_t* device,
Ben Vanike9ae9632022-10-04 08:13:30 -0700162 iree_vm_context_t* context,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700163 iree_vm_function_t function,
164 iree_vm_list_t* inputs,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700165 benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700166 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
167 benchmark_name.size());
Ben Vanik11c051a2020-10-21 09:58:22 -0700168 IREE_TRACE_FRAME_MARK();
169
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700170 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700171 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700172 iree_allocator_system(), &outputs));
173
Ben Vanik11c051a2020-10-21 09:58:22 -0700174 // Benchmarking loop.
Han-Chung Wanga43dd172021-03-11 02:44:05 +0800175 while (state.KeepRunningBatch(batch_size)) {
Ben Vanikcc436802023-06-10 08:53:52 -0700176 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
Ben Vanik11c051a2020-10-21 09:58:22 -0700177 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
Ben Vanik89e95302021-10-05 17:05:39 -0700178 IREE_CHECK_OK(iree_vm_invoke(
179 context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr,
180 inputs, outputs.get(), iree_allocator_system()));
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700181 IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0));
Ben Vanikcc436802023-06-10 08:53:52 -0700182 IREE_TRACE_ZONE_END(z1);
Ben Vanik82be9252023-08-25 11:12:18 -0700183 if (device) {
184 state.PauseTiming();
185 IREE_CHECK_OK(iree_hal_device_profiling_flush(device));
186 state.ResumeTiming();
187 }
Ben Vanik11c051a2020-10-21 09:58:22 -0700188 }
Ben Vanikb4ccbfc2022-08-30 15:43:41 -0700189 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700190
191 IREE_TRACE_ZONE_END(z0);
Ben Vanik11c051a2020-10-21 09:58:22 -0700192}
193
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700194void RegisterGenericBenchmark(const std::string& function_name,
Ben Vanik82be9252023-08-25 11:12:18 -0700195 iree_hal_device_t* device,
Ben Vanik5a58aa42021-05-07 12:46:29 -0700196 iree_vm_context_t* context,
197 iree_vm_function_t function,
Ben Vanik007109f2022-08-03 07:26:50 -0700198 iree_vm_list_t* inputs) {
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700199 auto benchmark_name = "BM_" + function_name;
Ben Vanike9ae9632022-10-04 08:13:30 -0700200 int32_t batch_size = FLAG_batch_size;
Ben Vanik007109f2022-08-03 07:26:50 -0700201 benchmark::RegisterBenchmark(benchmark_name.c_str(),
Ben Vanike9ae9632022-10-04 08:13:30 -0700202 [=](benchmark::State& state) -> void {
Ben Vanik007109f2022-08-03 07:26:50 -0700203 BenchmarkGenericFunction(
Ben Vanik82be9252023-08-25 11:12:18 -0700204 benchmark_name, batch_size, device,
205 context, function, inputs, state);
Ben Vanik007109f2022-08-03 07:26:50 -0700206 })
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700207 // By default only the main thread is included in CPU time. Include all
208 // the threads instead.
209 ->MeasureProcessCPUTime()
210 // To make single and multi-threaded benchmarks more comparable, use the
211 // wall time to determine how many iterations to run. See
212 // https://github.com/google/benchmark#cpu-timers,
213 ->UseRealTime()
Jerry Wu1ebcce32022-07-26 12:31:14 -0400214 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
215 : benchmark::kMillisecond);
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700216}
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700217
Ben Vanike9ae9632022-10-04 08:13:30 -0700218// Runs up to |batch_size| pipelined invocations in sequence along with
219// concurrency. Example:
220// batch_size=1, concurrency=1:
221// [invocation 0]
222// batch_size=2, concurrency=1:
223// [invocation 0] -> [invocation 1]
224// batch_size=2, concurrency=2:
225// [invocation 0]
226// [invocation 1]
227// batch_size=4, concurrency=2:
228// [invocation 0] -> [invocation 2]
229// [invocation 1] -> [invocation 3]
230static void BenchmarkAsyncFunction(
231 const std::string& benchmark_name, int32_t batch_size,
232 int32_t batch_concurrency, iree_hal_device_t* device,
233 iree_vm_context_t* context, iree_vm_function_t function,
234 iree_vm_list_t* common_inputs, benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700235 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
236 benchmark_name.size());
Ben Vanike9ae9632022-10-04 08:13:30 -0700237 IREE_TRACE_FRAME_MARK();
238 iree_allocator_t host_allocator = iree_allocator_system();
239
240 // Round up batch size to some multiple of concurrency.
241 batch_size = (int32_t)iree_host_align(batch_size, batch_concurrency);
242
243 // Benchmarking loop.
244 while (state.KeepRunningBatch(batch_size)) {
Ben Vanike9ae9632022-10-04 08:13:30 -0700245 state.PauseTiming();
Ben Vanikcc436802023-06-10 08:53:52 -0700246 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
247 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
Ben Vanike9ae9632022-10-04 08:13:30 -0700248
249 IREE_TRACE_ZONE_BEGIN_NAMED(z_begin, "PrepareBatch");
250
251 // Each concurrent track of execution gets its own semaphore.
252 std::vector<vm::ref<iree_hal_semaphore_t>> timeline_semaphores;
253 for (int32_t i = 0; i < batch_concurrency; ++i) {
254 vm::ref<iree_hal_semaphore_t> timeline_semaphore;
Ben Vanika28f76f2024-08-06 15:04:15 -0700255 IREE_CHECK_OK(iree_hal_semaphore_create(
256 device, 0ull, IREE_HAL_SEMAPHORE_FLAG_NONE, &timeline_semaphore));
Ben Vanike9ae9632022-10-04 08:13:30 -0700257 timeline_semaphores.push_back(std::move(timeline_semaphore));
258 }
259
260 // Preallocate fences and I/O for each invocation.
261 // The same inputs are used for each but we need a unique list to hold the
262 // unique fences. Each fence represents when the invocation has completed.
263 std::vector<vm::ref<iree_hal_fence_t>> invocation_fences;
264 std::vector<vm::ref<iree_vm_list_t>> invocation_inputs;
265 std::vector<vm::ref<iree_vm_list_t>> invocation_outputs;
266 vm::ref<iree_hal_fence_t> completion_fence;
267 IREE_CHECK_OK(iree_hal_fence_create(batch_concurrency, host_allocator,
268 &completion_fence));
269 for (int32_t i = 0; i < batch_size / batch_concurrency; ++i) {
270 for (int32_t j = 0; j < batch_concurrency; ++j) {
271 // Chain each concurrent minibatch to the previous. Note that to start
272 // we wait on nothing and begin executing immediately.
273 vm::ref<iree_hal_fence_t> wait_fence;
274 if (i > 0) {
275 wait_fence = vm::retain_ref(
276 invocation_fences[(i - 1) * batch_concurrency + j]);
277 }
278 uint64_t signal_value = i + 1;
279 vm::ref<iree_hal_fence_t> signal_fence;
280 IREE_CHECK_OK(iree_hal_fence_create_at(timeline_semaphores[j].get(),
281 signal_value, host_allocator,
282 &signal_fence));
283 invocation_fences.push_back(vm::retain_ref(signal_fence));
284
285 // Join the final minibatch on the completion fence.
286 if (i == batch_size / batch_concurrency - 1) {
287 IREE_CHECK_OK(iree_hal_fence_insert(completion_fence.get(),
288 timeline_semaphores[j].get(),
289 signal_value));
290 }
291
292 // Clone common inputs and add the invocation-specific fences.
293 vm::ref<iree_vm_list_t> inputs;
294 IREE_CHECK_OK(
295 iree_vm_list_clone(common_inputs, host_allocator, &inputs));
296 IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), wait_fence));
297 IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), signal_fence));
298 invocation_inputs.push_back(std::move(inputs));
299
300 // Setup empty outputs.
301 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700302 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanike9ae9632022-10-04 08:13:30 -0700303 host_allocator, &outputs));
304 invocation_outputs.push_back(std::move(outputs));
305 }
306 }
307
308 IREE_TRACE_ZONE_END(z_begin);
309
310 state.ResumeTiming();
311 {
312 // TODO(benvanik): replace with async invocations. Today if the invocation
313 // performs any waits this will block on the initial invoke instead of
314 // actually overlapping things.
315 for (int32_t i = 0; i < batch_size; ++i) {
316 IREE_CHECK_OK(
317 iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE,
318 /*policy=*/nullptr, invocation_inputs[i].get(),
319 invocation_outputs[i].get(), host_allocator));
320 }
321 IREE_CHECK_OK(
322 iree_hal_fence_wait(completion_fence.get(), iree_infinite_timeout()));
323 }
324 state.PauseTiming();
325
326 IREE_TRACE_ZONE_BEGIN_NAMED(z_end, "CleanupBatch");
327 for (int32_t i = 0; i < batch_size; ++i) {
328 iree_vm_list_clear(invocation_outputs[i].get());
329 }
330 invocation_fences.clear();
331 invocation_inputs.clear();
332 invocation_outputs.clear();
333 completion_fence.reset();
334 timeline_semaphores.clear();
335 IREE_TRACE_ZONE_END(z_end);
336
Ben Vanikcc436802023-06-10 08:53:52 -0700337 IREE_TRACE_ZONE_END(z1);
Ben Vanik82be9252023-08-25 11:12:18 -0700338 if (device) {
339 IREE_CHECK_OK(iree_hal_device_profiling_flush(device));
340 }
Ben Vanike9ae9632022-10-04 08:13:30 -0700341 state.ResumeTiming();
342 }
343 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700344
345 IREE_TRACE_ZONE_END(z0);
Ben Vanike9ae9632022-10-04 08:13:30 -0700346}
347
348void RegisterAsyncBenchmark(const std::string& function_name,
349 iree_hal_device_t* device,
350 iree_vm_context_t* context,
351 iree_vm_function_t function,
352 iree_vm_list_t* inputs) {
353 auto benchmark_name = "BM_" + function_name;
354 int32_t batch_size = FLAG_batch_size;
355 int32_t batch_concurrency = FLAG_batch_concurrency;
356 benchmark::RegisterBenchmark(
357 benchmark_name.c_str(),
358 [=](benchmark::State& state) -> void {
359 BenchmarkAsyncFunction(benchmark_name, batch_size, batch_concurrency,
360 device, context, function, inputs, state);
361 })
362 // By default only the main thread is included in CPU time. Include all
363 // the threads instead.
364 ->MeasureProcessCPUTime()
365 // To make single and multi-threaded benchmarks more comparable, use the
366 // wall time to determine how many iterations to run. See
367 // https://github.com/google/benchmark#cpu-timers,
368 ->UseRealTime()
369 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
370 : benchmark::kMillisecond);
371}
372
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700373static void BenchmarkDispatchFunction(const std::string& benchmark_name,
374 iree_vm_context_t* context,
375 iree_vm_function_t function,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700376 benchmark::State& state) {
Ben Vanikcc436802023-06-10 08:53:52 -0700377 IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, benchmark_name.data(),
378 benchmark_name.size());
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700379 IREE_TRACE_FRAME_MARK();
380
381 vm::ref<iree_vm_list_t> inputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700382 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700383 iree_allocator_system(), &inputs));
384 iree_vm_value_t batch_size = iree_vm_value_make_i32(FLAG_batch_size);
385 IREE_CHECK_OK(iree_vm_list_push_value(inputs.get(), &batch_size));
386
387 vm::ref<iree_vm_list_t> outputs;
Ben Vanik09630d62023-04-13 14:21:40 -0700388 IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 16,
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700389 iree_allocator_system(), &outputs));
390
391 // Benchmarking loop.
392 while (state.KeepRunningBatch(FLAG_batch_size)) {
Ben Vanikcc436802023-06-10 08:53:52 -0700393 IREE_TRACE_ZONE_BEGIN_NAMED(z1, "BenchmarkIteration");
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700394 IREE_TRACE_FRAME_MARK_NAMED("Iteration");
395 IREE_CHECK_OK(iree_vm_invoke(
396 context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/nullptr,
397 inputs.get(), outputs.get(), iree_allocator_system()));
398 IREE_CHECK_OK(iree_vm_list_resize(outputs.get(), 0));
Ben Vanikcc436802023-06-10 08:53:52 -0700399 IREE_TRACE_ZONE_END(z1);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700400 }
Ben Vanikb4ccbfc2022-08-30 15:43:41 -0700401 state.SetItemsProcessed(state.iterations());
Ben Vanikcc436802023-06-10 08:53:52 -0700402
403 IREE_TRACE_ZONE_END(z0);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700404}
405
406void RegisterDispatchBenchmark(const std::string& function_name,
407 iree_vm_context_t* context,
Ben Vanik007109f2022-08-03 07:26:50 -0700408 iree_vm_function_t function) {
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700409 auto benchmark_name = "BM_" + function_name;
Ben Vanik007109f2022-08-03 07:26:50 -0700410 benchmark::RegisterBenchmark(
411 benchmark_name.c_str(),
412 [benchmark_name, context, function](benchmark::State& state) -> void {
413 BenchmarkDispatchFunction(benchmark_name, context, function, state);
414 })
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700415 // By default only the main thread is included in CPU time. Include all
416 // the threads instead.
417 ->MeasureProcessCPUTime()
418 // To make single and multi-threaded benchmarks more comparable, use the
419 // wall time to determine how many iterations to run. See
420 // https://github.com/google/benchmark#cpu-timers,
421 ->UseRealTime()
Jerry Wu1ebcce32022-07-26 12:31:14 -0400422 ->Unit(FLAG_time_unit.first ? FLAG_time_unit.second
423 : benchmark::kMicrosecond);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700424}
425
Ben Vanik007109f2022-08-03 07:26:50 -0700426// The lifetime of IREEBenchmark should be as long as
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700427// ::benchmark::RunSpecifiedBenchmarks() where the resources are used during
428// benchmarking.
429class IREEBenchmark {
430 public:
Ben Vanik9461d3b2023-04-18 16:39:25 -0700431 IREEBenchmark() { iree_tooling_module_list_initialize(&module_list_); }
Ben Vanikb20b6022021-02-16 12:59:31 -0800432
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700433 ~IREEBenchmark() {
Ben Vanikcc436802023-06-10 08:53:52 -0700434 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::dtor");
Ben Vanik11c051a2020-10-21 09:58:22 -0700435
Ben Vanik007109f2022-08-03 07:26:50 -0700436 // Order matters. Tear down modules first to release resources.
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700437 inputs_.reset();
Ben Vanikc149d612022-11-09 01:07:17 +0000438 context_.reset();
Ben Vanik9461d3b2023-04-18 16:39:25 -0700439 iree_tooling_module_list_reset(&module_list_);
Ben Vanikc149d612022-11-09 01:07:17 +0000440 instance_.reset();
Ben Vanik007109f2022-08-03 07:26:50 -0700441
442 // Tear down device last in order to get accurate statistics.
Ben Vanik2b8438f2022-08-30 16:07:41 -0700443 if (device_allocator_ && FLAG_print_statistics) {
Ben Vanikc149d612022-11-09 01:07:17 +0000444 IREE_IGNORE_ERROR(iree_hal_allocator_statistics_fprint(
445 stderr, device_allocator_.get()));
Ben Vanik007109f2022-08-03 07:26:50 -0700446 }
Ben Vanikc149d612022-11-09 01:07:17 +0000447 device_allocator_.reset();
448 device_.reset();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700449 };
450
Ben Vanikc149d612022-11-09 01:07:17 +0000451 iree_hal_device_t* device() const { return device_.get(); }
Ben Vanik7859d632022-10-24 14:37:28 -0700452
Ben Vanik5a266192021-05-01 15:22:06 -0700453 iree_status_t Register() {
Ben Vanikcc436802023-06-10 08:53:52 -0700454 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Register");
Ben Vanik11c051a2020-10-21 09:58:22 -0700455
Ben Vanik9461d3b2023-04-18 16:39:25 -0700456 if (!instance_ || !device_allocator_ || !context_ || !module_list_.count) {
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700457 IREE_RETURN_IF_ERROR(Init());
458 }
459
Ben Vanikf65c5cb2023-02-01 11:02:10 -0800460 auto function_name = std::string(FLAG_function);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700461 if (!function_name.empty()) {
462 IREE_RETURN_IF_ERROR(RegisterSpecificFunction(function_name));
463 } else {
464 IREE_RETURN_IF_ERROR(RegisterAllExportedFunctions());
465 }
Ben Vanik5a266192021-05-01 15:22:06 -0700466 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700467 }
468
469 private:
Ben Vanik5a266192021-05-01 15:22:06 -0700470 iree_status_t Init() {
Ben Vanikcc436802023-06-10 08:53:52 -0700471 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::Init");
Ben Vanik11c051a2020-10-21 09:58:22 -0700472 IREE_TRACE_FRAME_MARK_BEGIN_NAMED("init");
473
Ben Vanik007109f2022-08-03 07:26:50 -0700474 iree_allocator_t host_allocator = iree_allocator_system();
Ben Vanik35bc9a12022-03-09 09:05:58 -0800475 IREE_RETURN_IF_ERROR(
Ben Vanik007109f2022-08-03 07:26:50 -0700476 iree_tooling_create_instance(host_allocator, &instance_));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700477
Ben Vanik9461d3b2023-04-18 16:39:25 -0700478 IREE_RETURN_IF_ERROR(iree_tooling_load_modules_from_flags(
479 instance_.get(), host_allocator, &module_list_));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700480
Ben Vanik007109f2022-08-03 07:26:50 -0700481 IREE_RETURN_IF_ERROR(iree_tooling_create_context_from_flags(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700482 instance_.get(), module_list_.count, module_list_.values,
Ben Vanik007109f2022-08-03 07:26:50 -0700483 /*default_device_uri=*/iree_string_view_empty(), host_allocator,
484 &context_, &device_, &device_allocator_));
Ben Vanik11c051a2020-10-21 09:58:22 -0700485
486 IREE_TRACE_FRAME_MARK_END_NAMED("init");
Ben Vanik5a266192021-05-01 15:22:06 -0700487 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700488 }
489
Ben Vanik5a266192021-05-01 15:22:06 -0700490 iree_status_t RegisterSpecificFunction(const std::string& function_name) {
Ben Vanikcc436802023-06-10 08:53:52 -0700491 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterSpecificFunction");
Ben Vanik11c051a2020-10-21 09:58:22 -0700492
Ben Vanik9461d3b2023-04-18 16:39:25 -0700493 iree_vm_module_t* main_module =
494 iree_tooling_module_list_back(&module_list_);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700495 iree_vm_function_t function;
Ben Vanikb697e762022-06-15 12:07:58 -0700496 IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_name(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700497 main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT,
Scott Todd60b07642023-06-15 09:41:01 -0700498 iree_string_view_t{function_name.data(),
499 (iree_host_size_t)function_name.size()},
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700500 &function));
Ben Vanik30901f52024-02-08 11:23:21 -0800501 iree_vm_function_signature_t signature =
502 iree_vm_function_signature(&function);
503 iree_string_view_t arguments_cconv, results_cconv;
504 IREE_RETURN_IF_ERROR(iree_vm_function_call_get_cconv_fragments(
505 &signature, &arguments_cconv, &results_cconv));
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700506
Ben Vanik30901f52024-02-08 11:23:21 -0800507 IREE_CHECK_OK(iree_tooling_parse_variants(
508 arguments_cconv, FLAG_input_list(), device_.get(),
509 device_allocator_.get(), iree_vm_instance_allocator(instance_.get()),
Ben Vanikf65c5cb2023-02-01 11:02:10 -0800510 &inputs_));
Ben Vanike9ae9632022-10-04 08:13:30 -0700511
512 iree_string_view_t invocation_model = iree_vm_function_lookup_attr_by_name(
513 &function, IREE_SV("iree.abi.model"));
514 if (iree_string_view_equal(invocation_model, IREE_SV("coarse-fences"))) {
515 // Asynchronous invocation.
Ben Vanikc149d612022-11-09 01:07:17 +0000516 iree::RegisterAsyncBenchmark(function_name, device_.get(), context_.get(),
517 function, inputs_.get());
Ben Vanike9ae9632022-10-04 08:13:30 -0700518 } else {
519 // Synchronous invocation.
Ben Vanik82be9252023-08-25 11:12:18 -0700520 iree::RegisterGenericBenchmark(function_name, device_.get(),
521 context_.get(), function, inputs_.get());
Ben Vanike9ae9632022-10-04 08:13:30 -0700522 }
Ben Vanik5a266192021-05-01 15:22:06 -0700523 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700524 }
525
Ben Vanik5a266192021-05-01 15:22:06 -0700526 iree_status_t RegisterAllExportedFunctions() {
Ben Vanikcc436802023-06-10 08:53:52 -0700527 IREE_TRACE_SCOPE_NAMED("IREEBenchmark::RegisterAllExportedFunctions");
Ben Vanik9461d3b2023-04-18 16:39:25 -0700528 iree_vm_module_t* main_module =
529 iree_tooling_module_list_back(&module_list_);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700530 iree_vm_module_signature_t signature =
Ben Vanik9461d3b2023-04-18 16:39:25 -0700531 iree_vm_module_signature(main_module);
Ben Vanik7f3a7e32020-11-14 14:16:07 -0800532 for (iree_host_size_t i = 0; i < signature.export_function_count; ++i) {
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700533 iree_vm_function_t function;
534 IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal(
Ben Vanik9461d3b2023-04-18 16:39:25 -0700535 main_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700536 iree_string_view_t function_name = iree_vm_function_name(&function);
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700537
Ben Vanik5a58aa42021-05-07 12:46:29 -0700538 // We run anything with the 'benchmark' attribute.
539 // If the attribute is not present we'll run anything that looks runnable.
Ben Vanik1d60c182022-06-28 12:37:40 -0700540 iree_string_view_t benchmark_type = iree_vm_function_lookup_attr_by_name(
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700541 &function, IREE_SV("iree.benchmark"));
542 if (iree_string_view_equal(benchmark_type, IREE_SV("dispatch"))) {
543 iree::RegisterDispatchBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000544 std::string(function_name.data, function_name.size), context_.get(),
Ben Vanik007109f2022-08-03 07:26:50 -0700545 function);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700546 } else if (iree_string_view_equal(benchmark_type, IREE_SV("entry"))) {
547 iree::RegisterGenericBenchmark(
Ben Vanik82be9252023-08-25 11:12:18 -0700548 std::string(function_name.data, function_name.size), device_.get(),
549 context_.get(), function,
Ben Vanik007109f2022-08-03 07:26:50 -0700550 /*inputs=*/nullptr);
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700551 } else {
552 // Pick up generic () -> () functions.
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700553 if (iree_string_view_starts_with(function_name,
Ben Vanik5a58aa42021-05-07 12:46:29 -0700554 iree_make_cstring_view("__")) ||
Ben Vanik6c4dd5b2021-10-05 15:29:23 -0700555 iree_string_view_find_char(function_name, '$', 0) !=
Ben Vanik5a58aa42021-05-07 12:46:29 -0700556 IREE_STRING_VIEW_NPOS) {
557 // Skip internal or special functions.
558 continue;
559 }
560
Ben Vanike9ae9632022-10-04 08:13:30 -0700561 // Query function information to determine how to run it.
Ben Vanik5a58aa42021-05-07 12:46:29 -0700562 iree_vm_function_signature_t signature =
563 iree_vm_function_signature(&function);
564 iree_host_size_t argument_count = 0;
565 iree_host_size_t result_count = 0;
566 IREE_RETURN_IF_ERROR(iree_vm_function_call_count_arguments_and_results(
567 &signature, &argument_count, &result_count));
Ben Vanike9ae9632022-10-04 08:13:30 -0700568 iree_string_view_t invocation_model =
569 iree_vm_function_lookup_attr_by_name(&function,
570 IREE_SV("iree.abi.model"));
571 if (iree_string_view_equal(invocation_model,
572 IREE_SV("coarse-fences"))) {
573 // Asynchronous invocation with coarse fences. Expect just those.
574 if (argument_count == 2) {
575 // Only functions taking a (wait, signal) fence pair are run.
576 iree::RegisterAsyncBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000577 std::string(function_name.data, function_name.size),
578 device_.get(), context_.get(), function,
Ben Vanike9ae9632022-10-04 08:13:30 -0700579 /*inputs=*/nullptr);
580 }
581 } else {
582 // Basic synchronous invocation.
583 if (argument_count == 0) {
584 // Only functions with no inputs are run (because we can't pass
585 // anything).
586 iree::RegisterGenericBenchmark(
Ben Vanikc149d612022-11-09 01:07:17 +0000587 std::string(function_name.data, function_name.size),
Ben Vanik82be9252023-08-25 11:12:18 -0700588 device_.get(), context_.get(), function,
Ben Vanike9ae9632022-10-04 08:13:30 -0700589 /*inputs=*/nullptr);
590 }
Ben Vanik5a58aa42021-05-07 12:46:29 -0700591 }
Ben Vanikf8e11fd2022-04-12 09:43:41 -0700592 }
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700593 }
Ben Vanik5a266192021-05-01 15:22:06 -0700594 return iree_ok_status();
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700595 }
596
Ben Vanikc149d612022-11-09 01:07:17 +0000597 iree::vm::ref<iree_vm_instance_t> instance_;
598 iree::vm::ref<iree_vm_context_t> context_;
599 iree::vm::ref<iree_hal_device_t> device_;
600 iree::vm::ref<iree_hal_allocator_t> device_allocator_;
Ben Vanik9461d3b2023-04-18 16:39:25 -0700601 iree_tooling_module_list_t module_list_;
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700602 iree::vm::ref<iree_vm_list_t> inputs_;
603};
Ahmed S. Taei7a2f5ea2020-10-06 20:08:53 -0700604} // namespace
Geoffrey Martin-Noble34190262020-02-06 10:43:58 -0800605} // namespace iree
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700606
607int main(int argc, char** argv) {
Ben Vanik7ed4f4b2023-06-14 13:33:54 -0700608 IREE_TRACE_APP_ENTER();
Ben Vanik14308b12023-06-13 10:22:28 -0700609 IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree-benchmark-module");
Ben Vanik11c051a2020-10-21 09:58:22 -0700610
Ben Vanik1cb2f7a2021-04-26 16:32:53 -0700611 // Pass through flags to benchmark (allowing --help to fall through).
Stella Laurenzoa2733b02023-11-08 13:22:50 -0800612 iree_flags_set_usage(
613 "iree-benchmark-module",
614 "Benchmarks a function within a compiled IREE module and handles I/O\n"
615 "parsing. Modules can be provided by file path (`--module=file.vmfb`)\n"
616 "or read from stdin (`--module=-`) and the function to execute\n"
617 "matches the original name provided to the compiler\n"
618 "(`--function=foo` for `func.func @foo`).\n");
Ben Vanik1cb2f7a2021-04-26 16:32:53 -0700619 iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK |
620 IREE_FLAGS_PARSE_MODE_CONTINUE_AFTER_HELP,
621 &argc, &argv);
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700622 ::benchmark::Initialize(&argc, argv);
Ben Vanik1cb2f7a2021-04-26 16:32:53 -0700623
Han-Chung Wang00aa2fc2020-10-12 02:05:39 -0700624 iree::IREEBenchmark iree_benchmark;
Ben Vanik524c8e72021-05-01 15:48:44 -0700625 iree_status_t status = iree_benchmark.Register();
626 if (!iree_status_is_ok(status)) {
Ben Vanik14308b12023-06-13 10:22:28 -0700627 int exit_code = static_cast<int>(iree_status_code(status));
bjacob1cb92dd2022-09-26 16:21:02 +0000628 printf("%s\n", iree::Status(std::move(status)).ToString().c_str());
Ben Vanik14308b12023-06-13 10:22:28 -0700629 IREE_TRACE_ZONE_END(z0);
630 IREE_TRACE_APP_EXIT(exit_code);
631 return exit_code;
Han-Chung Wangbb9bcd32020-10-07 08:18:05 -0700632 }
Ben Vanik7859d632022-10-24 14:37:28 -0700633 IREE_CHECK_OK(iree_hal_begin_profiling_from_flags(iree_benchmark.device()));
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700634 ::benchmark::RunSpecifiedBenchmarks();
Ben Vanik7859d632022-10-24 14:37:28 -0700635 IREE_CHECK_OK(iree_hal_end_profiling_from_flags(iree_benchmark.device()));
Ben Vanik14308b12023-06-13 10:22:28 -0700636
637 IREE_TRACE_ZONE_END(z0);
638 IREE_TRACE_APP_EXIT(EXIT_SUCCESS);
639 return EXIT_SUCCESS;
Ahmed S. Taeif1678df2020-08-26 20:38:50 -0700640}