blob: 06b8c38d9816161974dd492534755d5df860a797 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "iree/base/internal/threading.h"
namespace {
//==============================================================================
// iree_fpu_state_*
//==============================================================================
constexpr size_t kElementBufferSize = 2048;
// Scales a buffer of floats by |scale| and disables autovectorization.
// Will generally be normal scalar floating point math and indicate whether the
// FPU has issues with denormals.
static float UnvectorizedScaleBufferByValue(float scale) {
float buffer[kElementBufferSize];
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
buffer[i] = 1.0f;
}
benchmark::DoNotOptimize(*buffer);
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
buffer[i] *= scale;
benchmark::DoNotOptimize(buffer[i]);
}
benchmark::DoNotOptimize(*buffer);
float sum = 0.0f;
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
sum += buffer[i];
}
return sum;
}
// Scales a buffer of floats by |scale| and allows autovectorization.
// Will generally be SIMD floating point math and indicate whether the vector
// units (NEON, AVX, etc) have issues with denormals.
static float VectorizedScaleBufferByValue(float scale) {
float buffer[kElementBufferSize];
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
buffer[i] = 1.0f;
}
benchmark::DoNotOptimize(*buffer);
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
buffer[i] *= scale;
}
benchmark::DoNotOptimize(*buffer);
float sum = 0.0f;
for (size_t i = 0; i < IREE_ARRAYSIZE(buffer); ++i) {
sum += buffer[i];
}
return sum;
}
void BM_UnvectorizedNormals(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1.0f));
}
}
BENCHMARK(BM_UnvectorizedNormals);
void BM_UnvectorizedDenormals(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
}
}
BENCHMARK(BM_UnvectorizedDenormals);
void BM_UnvectorizedDenormalsFlushedToZero(benchmark::State& state) {
iree_fpu_state_t fpu_state =
iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
for (auto _ : state) {
benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
}
iree_fpu_state_pop(fpu_state);
}
BENCHMARK(BM_UnvectorizedDenormalsFlushedToZero);
void BM_UnvectorizedDenormalsNotFlushedToZero(benchmark::State& state) {
iree_fpu_state_t fpu_state = iree_fpu_state_push(IREE_FPU_STATE_DEFAULT);
for (auto _ : state) {
benchmark::DoNotOptimize(UnvectorizedScaleBufferByValue(1e-39f));
}
iree_fpu_state_pop(fpu_state);
}
BENCHMARK(BM_UnvectorizedDenormalsNotFlushedToZero);
void BM_VectorizedNormals(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1.0f));
}
}
BENCHMARK(BM_VectorizedNormals);
void BM_VectorizedDenormals(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
}
}
BENCHMARK(BM_VectorizedDenormals);
void BM_VectorizedDenormalsFlushedToZero(benchmark::State& state) {
iree_fpu_state_t fpu_state =
iree_fpu_state_push(IREE_FPU_STATE_FLAG_FLUSH_DENORMALS_TO_ZERO);
for (auto _ : state) {
benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
}
iree_fpu_state_pop(fpu_state);
}
BENCHMARK(BM_VectorizedDenormalsFlushedToZero);
void BM_VectorizedDenormalsNotFlushedToZero(benchmark::State& state) {
iree_fpu_state_t fpu_state = iree_fpu_state_push(IREE_FPU_STATE_DEFAULT);
for (auto _ : state) {
benchmark::DoNotOptimize(VectorizedScaleBufferByValue(1e-39f));
}
iree_fpu_state_pop(fpu_state);
}
BENCHMARK(BM_VectorizedDenormalsNotFlushedToZero);
} // namespace