blob: 1c77bf5374f71fbb6ad82780ae62212cdce07a5f [file] [log] [blame]
// Copyright 2019 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include <array>
#include <vector>
#include "iree/base/api.h"
#include "iree/testing/benchmark.h"
#include "iree/vm/api.h"
#include "iree/vm/bytecode/module.h"
#include "iree/vm/bytecode/module_benchmark_module_c.h"
namespace {
struct native_import_module_s;
struct native_import_module_state_s;
typedef struct native_import_module_t native_import_module_t;
typedef struct native_import_module_state_t native_import_module_state_t;
// vm.import private @native_import_module.add_1(%arg0 : i32) -> i32
static iree_status_t native_import_module_add_1(
iree_vm_stack_t* stack, iree_vm_native_function_flags_t flags,
iree_byte_span_t args_storage, iree_byte_span_t rets_storage,
iree_vm_native_function_target_t target_fn, void* module,
void* module_state) {
// Add 1 to arg0 and return.
int32_t arg0 = *reinterpret_cast<int32_t*>(args_storage.data);
int32_t ret0 = arg0 + 1;
*reinterpret_cast<int32_t*>(rets_storage.data) = ret0;
return iree_ok_status();
}
static const iree_vm_native_export_descriptor_t
native_import_module_exports_[] = {
{iree_make_cstring_view("add_1"), iree_make_cstring_view("0i_i"), 0,
NULL},
};
static const iree_vm_native_function_ptr_t native_import_module_funcs_[] = {
{(iree_vm_native_function_shim_t)native_import_module_add_1, NULL},
};
static_assert(IREE_ARRAYSIZE(native_import_module_funcs_) ==
IREE_ARRAYSIZE(native_import_module_exports_),
"function pointer table must be 1:1 with exports");
static const iree_vm_native_module_descriptor_t
native_import_module_descriptor_ = {
/*.name=*/iree_make_cstring_view("native_import_module"),
/*.version=*/0u,
/*.attr_count=*/0,
/*.attrs=*/NULL,
/*.dependency_count=*/0,
/*.dependencies=*/NULL,
/*.import_count=*/0,
/*.imports=*/NULL,
/*.export_count=*/IREE_ARRAYSIZE(native_import_module_exports_),
/*.exports=*/native_import_module_exports_,
/*.import_count=*/IREE_ARRAYSIZE(native_import_module_funcs_),
/*.imports=*/native_import_module_funcs_,
};
static iree_status_t native_import_module_create(
iree_vm_instance_t* instance, iree_allocator_t allocator,
iree_vm_module_t** out_module) {
iree_vm_module_t interface;
IREE_RETURN_IF_ERROR(iree_vm_module_initialize(&interface, NULL));
return iree_vm_native_module_create(&interface,
&native_import_module_descriptor_,
instance, allocator, out_module);
}
// Benchmarks the given exported function, optionally passing in arguments.
static iree_status_t RunFunction(iree_benchmark_state_t* benchmark_state,
iree_string_view_t function_name,
std::vector<int32_t> i32_args,
int result_count, int64_t batch_size = 1) {
iree_vm_instance_t* instance = NULL;
IREE_CHECK_OK(iree_vm_instance_create(IREE_VM_TYPE_CAPACITY_DEFAULT,
iree_allocator_system(), &instance));
iree_vm_module_t* import_module = NULL;
IREE_CHECK_OK(native_import_module_create(instance, iree_allocator_system(),
&import_module));
const auto* module_file_toc =
iree_vm_bytecode_module_benchmark_module_create();
iree_vm_module_t* bytecode_module = nullptr;
IREE_CHECK_OK(iree_vm_bytecode_module_create(
instance,
iree_const_byte_span_t{
reinterpret_cast<const uint8_t*>(module_file_toc->data),
static_cast<iree_host_size_t>(module_file_toc->size)},
iree_allocator_null(), iree_allocator_system(), &bytecode_module));
std::array<iree_vm_module_t*, 2> modules = {import_module, bytecode_module};
iree_vm_context_t* context = NULL;
IREE_CHECK_OK(iree_vm_context_create_with_modules(
instance, IREE_VM_CONTEXT_FLAG_NONE, modules.size(), modules.data(),
iree_allocator_system(), &context));
iree_vm_function_t function;
IREE_CHECK_OK(
iree_vm_context_resolve_function(context, function_name, &function));
iree_vm_function_call_t call;
memset(&call, 0, sizeof(call));
call.function = function;
call.arguments =
iree_make_byte_span(iree_alloca(i32_args.size() * sizeof(int32_t)),
i32_args.size() * sizeof(int32_t));
call.results =
iree_make_byte_span(iree_alloca(result_count * sizeof(int32_t)),
result_count * sizeof(int32_t));
IREE_VM_INLINE_STACK_INITIALIZE(stack, IREE_VM_INVOCATION_FLAG_NONE,
iree_vm_context_state_resolver(context),
iree_allocator_system());
while (iree_benchmark_keep_running(benchmark_state, batch_size)) {
for (iree_host_size_t i = 0; i < i32_args.size(); ++i) {
reinterpret_cast<int32_t*>(call.arguments.data)[i] = i32_args[i];
}
IREE_CHECK_OK(
bytecode_module->begin_call(bytecode_module->self, stack, call));
}
iree_vm_stack_deinitialize(stack);
iree_vm_module_release(import_module);
iree_vm_module_release(bytecode_module);
iree_vm_context_release(context);
iree_vm_instance_release(instance);
return iree_ok_status();
}
IREE_BENCHMARK_FN(BM_ModuleCreate) {
iree_vm_instance_t* instance = NULL;
IREE_CHECK_OK(iree_vm_instance_create(IREE_VM_TYPE_CAPACITY_DEFAULT,
iree_allocator_system(), &instance));
while (iree_benchmark_keep_running(benchmark_state, 1)) {
const auto* module_file_toc =
iree_vm_bytecode_module_benchmark_module_create();
iree_vm_module_t* module = nullptr;
IREE_CHECK_OK(iree_vm_bytecode_module_create(
instance,
iree_const_byte_span_t{
reinterpret_cast<const uint8_t*>(module_file_toc->data),
static_cast<iree_host_size_t>(module_file_toc->size)},
iree_allocator_null(), iree_allocator_system(), &module));
// Just testing creation and verification here!
iree_optimization_barrier(module);
iree_vm_module_release(module);
}
iree_vm_instance_release(instance);
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_ModuleCreate);
IREE_BENCHMARK_FN(BM_ModuleCreateState) {
iree_vm_instance_t* instance = NULL;
IREE_CHECK_OK(iree_vm_instance_create(IREE_VM_TYPE_CAPACITY_DEFAULT,
iree_allocator_system(), &instance));
const auto* module_file_toc =
iree_vm_bytecode_module_benchmark_module_create();
iree_vm_module_t* module = nullptr;
IREE_CHECK_OK(iree_vm_bytecode_module_create(
instance,
iree_const_byte_span_t{
reinterpret_cast<const uint8_t*>(module_file_toc->data),
static_cast<iree_host_size_t>(module_file_toc->size)},
iree_allocator_null(), iree_allocator_system(), &module));
while (iree_benchmark_keep_running(benchmark_state, 1)) {
iree_vm_module_state_t* module_state;
module->alloc_state(module->self, iree_allocator_system(), &module_state);
// Really just testing malloc overhead, though it'll be module-dependent
// and if we do anything heavyweight on state init it'll show here.
iree_optimization_barrier(module_state);
module->free_state(module->self, module_state);
}
iree_vm_module_release(module);
iree_vm_instance_release(instance);
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_ModuleCreateState);
IREE_BENCHMARK_FN(BM_FullModuleInit) {
iree_vm_instance_t* instance = NULL;
IREE_CHECK_OK(iree_vm_instance_create(IREE_VM_TYPE_CAPACITY_DEFAULT,
iree_allocator_system(), &instance));
while (iree_benchmark_keep_running(benchmark_state, 1)) {
const auto* module_file_toc =
iree_vm_bytecode_module_benchmark_module_create();
iree_vm_module_t* module = nullptr;
IREE_CHECK_OK(iree_vm_bytecode_module_create(
instance,
iree_const_byte_span_t{
reinterpret_cast<const uint8_t*>(module_file_toc->data),
static_cast<iree_host_size_t>(module_file_toc->size)},
iree_allocator_null(), iree_allocator_system(), &module));
iree_vm_module_state_t* module_state;
module->alloc_state(module->self, iree_allocator_system(), &module_state);
iree_optimization_barrier(module_state);
module->free_state(module->self, module_state);
iree_vm_module_release(module);
}
iree_vm_instance_release(instance);
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_FullModuleInit);
IREE_ATTRIBUTE_NOINLINE static int empty_fn(void) {
int ret = 1;
iree_optimization_barrier(ret);
return ret;
}
IREE_BENCHMARK_FN(BM_EmptyFuncReference) {
while (iree_benchmark_keep_running(benchmark_state, 1)) {
int ret = empty_fn();
iree_optimization_barrier(ret);
iree_benchmark_clobber();
}
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_EmptyFuncReference);
IREE_BENCHMARK_FN(BM_EmptyFuncBytecode) {
IREE_CHECK_OK(RunFunction(
benchmark_state,
iree_make_cstring_view("bytecode_module_benchmark.empty_func"), {},
/*result_count=*/0));
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_EmptyFuncBytecode);
IREE_ATTRIBUTE_NOINLINE static int add_fn(int value) {
iree_optimization_barrier(value += value);
return value;
}
IREE_BENCHMARK_FN(BM_CallInternalFuncReference) {
while (iree_benchmark_keep_running(benchmark_state, 10)) {
int value = 1;
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
value = add_fn(value);
iree_optimization_barrier(value);
iree_benchmark_clobber();
}
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_CallInternalFuncReference);
IREE_BENCHMARK_FN(BM_CallInternalFuncBytecode) {
static const int batch = 100;
return RunFunction(
benchmark_state,
iree_make_cstring_view("bytecode_module_benchmark.call_internal_func"),
{batch},
/*result_count=*/1,
/*batch_size=*/batch);
}
IREE_BENCHMARK_REGISTER(BM_CallInternalFuncBytecode);
IREE_BENCHMARK_FN(BM_CallImportedFuncBytecode) {
static const int batch = 100;
return RunFunction(
benchmark_state,
iree_make_cstring_view("bytecode_module_benchmark.call_imported_func"),
{batch},
/*result_count=*/1,
/*batch_size=*/batch);
}
IREE_BENCHMARK_REGISTER(BM_CallImportedFuncBytecode);
IREE_BENCHMARK_FN(BM_LoopSumReference) {
static const int batch = 100000;
static auto work = +[](int x) {
iree_optimization_barrier(x);
return x;
};
static auto loop = +[](int count) {
int i = 0;
for (; i < count; ++i) {
iree_optimization_barrier(i = work(i));
}
return i;
};
while (iree_benchmark_keep_running(benchmark_state, batch)) {
int ret = loop(batch);
iree_optimization_barrier(ret);
iree_benchmark_clobber();
}
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_LoopSumReference);
IREE_BENCHMARK_FN(BM_LoopSumBytecode) {
static const int batch = 100000;
return RunFunction(
benchmark_state,
iree_make_cstring_view("bytecode_module_benchmark.loop_sum"), {batch},
/*result_count=*/1,
/*batch_size=*/batch);
}
IREE_BENCHMARK_REGISTER(BM_LoopSumBytecode);
IREE_BENCHMARK_FN(BM_BufferReduceReference) {
static const int batch = 100000;
static auto work = +[](int32_t* buffer, int i, int sum) {
int new_sum = buffer[i] + sum;
iree_optimization_barrier(new_sum);
return new_sum;
};
static auto loop = +[](int32_t* buffer, int count) {
int sum = 0;
for (int i = 0; i < count; ++i) {
iree_optimization_barrier(sum = work(buffer, i, sum));
}
return sum;
};
while (iree_benchmark_keep_running(benchmark_state, batch)) {
int32_t* buffer = (int32_t*)malloc(batch * 4);
for (int i = 0; i < batch; ++i) {
buffer[i] = 1;
}
int ret = loop(buffer, batch);
iree_optimization_barrier(ret);
iree_benchmark_clobber();
free(buffer);
}
return iree_ok_status();
}
IREE_BENCHMARK_REGISTER(BM_BufferReduceReference);
IREE_BENCHMARK_FN(BM_BufferReduceBytecode) {
static const int batch = 100000;
return RunFunction(
benchmark_state,
iree_make_cstring_view("bytecode_module_benchmark.buffer_reduce"),
{batch},
/*result_count=*/1,
/*batch_size=*/batch);
}
IREE_BENCHMARK_REGISTER(BM_BufferReduceBytecode);
// NOTE: unrolled 8x, requires %count to be % 8 = 0.
IREE_BENCHMARK_FN(BM_BufferReduceBytecodeUnrolled) {
static const int batch = 100000;
return RunFunction(benchmark_state,
iree_make_cstring_view(
"bytecode_module_benchmark.buffer_reduce_unrolled"),
{batch},
/*result_count=*/1,
/*batch_size=*/batch);
}
IREE_BENCHMARK_REGISTER(BM_BufferReduceBytecodeUnrolled);
} // namespace