blob: c59f026c5c32ec3910f76351968a6f795aaf3bbb [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "experimental/ModelBuilder/MemRefUtils.h"
#include "experimental/ModelBuilder/ModelBuilder.h"
#include "experimental/ModelBuilder/ModelRunner.h"
using namespace mlir; // NOLINT
// Helper method to construct an affine map.
static AffineMap makeMap(ModelBuilder &mb, int i) {
SmallVector<AffineExpr, 4> results;
if (i == 2) {
results.push_back(getAffineDimExpr(0, mb.getContext()));
results.push_back(getAffineDimExpr(1, mb.getContext()));
} else {
results.push_back(getAffineDimExpr(i, mb.getContext()));
}
return AffineMap::get(2, 0, results);
}
// Helper method to build a NxN matrix-vector multiplication
// that runs I times to amortize any calling overhead.
template <unsigned N, unsigned I>
void buildMatMat(ModelBuilder &mb, StringLiteral fn) {
auto f32 = mb.f32;
auto nnVectorType = mb.getVectorType({N, N}, f32);
auto typeA = mb.getMemRefType({}, nnVectorType);
auto nVectorType = mb.getVectorType({N}, f32);
auto typeB = mb.getMemRefType({}, nVectorType);
auto typeC = typeB;
auto f = mb.makeFunction(fn, {}, {typeA, typeB, typeC});
OpBuilder b(&f.getBody());
ScopedContext scope(b, f.getLoc());
// Build the following accesses:
// affine_map<(i, j) -> (i, j)>,
// affine_map<(i, j) -> (j)>,
// affine_map<(i, j) -> (i)>
SmallVector<AffineMap, 4> accesses;
accesses.push_back(makeMap(mb, 2));
accesses.push_back(makeMap(mb, 1));
accesses.push_back(makeMap(mb, 0));
// Build the following iterator types:
// iterator_types = ["parallel", "reduction"]
SmallVector<Attribute, 4> iterator_types;
iterator_types.push_back(mb.getStringAttr("parallel"));
iterator_types.push_back(mb.getStringAttr("reduction"));
// Loop I times over the kernel to amortize calling overhead.
auto loop =
b.create<loop::ForOp>(f.getLoc(), std_constant_index(0),
std_constant_index(I), std_constant_index(1));
OpBuilder bodyBuilder = loop.getBodyBuilder();
{
edsc::ScopedContext bodyScope(bodyBuilder, f.getLoc());
// Compute c += A x b.
StdIndexedValue A(f.getArgument(0)), B(f.getArgument(1)),
C(f.getArgument(2));
C() = (vector_contract(*A(), *B(), *C(), mb.getAffineMapArrayAttr(accesses),
mb.getArrayAttr(iterator_types)));
}
std_ret();
}
// Benchmark method.
template <unsigned N>
void testMatVecUsingVectors(benchmark::State &state, StringLiteral funcName,
bool measureBuild) {
// Prepare arguments beforehand.
auto incInit = [](unsigned idx, Vector2D<N, N, float> *ptr) {
float *p = reinterpret_cast<float *>(ptr + idx);
for (unsigned i = 0; i < N * N; ++i) p[i] = 1.0f + i;
};
auto oneInit = [](unsigned idx, Vector1D<N, float> *ptr) {
float *p = reinterpret_cast<float *>(ptr + idx);
for (unsigned i = 0; i < N; ++i) p[i] = 1.0f;
};
auto zeroInit = [](unsigned idx, Vector1D<N, float> *ptr) {
float *p = reinterpret_cast<float *>(ptr + idx);
for (unsigned i = 0; i < N; ++i) p[i] = 0.0f;
};
auto A = makeInitializedStridedMemRefDescriptor<Vector2D<N, N, float>, 1>(
{1}, incInit);
auto B = makeInitializedStridedMemRefDescriptor<Vector1D<N, float>, 1>(
{1}, oneInit);
auto C = makeInitializedStridedMemRefDescriptor<Vector1D<N, float>, 1>(
{1}, zeroInit);
auto *bufferA = A.get();
auto *bufferB = B.get();
auto *bufferC = C.get();
void *args[3] = {&bufferA, &bufferB, &bufferC};
const std::string kFuncAdapterName =
(llvm::Twine("_mlir_ciface_") + funcName).str();
if (measureBuild) {
// If this is a build-time benchmark, build, compile, and execute
// the function inside the timed loop, building a fresh new function
// in each iteration to get the full JIT time (keep I == 1 here).
for (auto _ : state) {
ModelBuilder builder;
buildMatMat<N, 1>(builder, funcName);
ModelRunner runner(builder.getModuleRef());
runner.compile(CompilationOptions());
auto err = runner.engine->invoke(kFuncAdapterName,
MutableArrayRef<void *>{args});
if (err) llvm_unreachable("Error compiling/running function.");
}
} else {
// If this is a run-time benchmark, build, compile, and execute
// the function once outside the timed loop, then continue running
// the same function inside the loop to focus on actual runtime
// (set I == 1000 here to amortize calling overhead).
ModelBuilder builder;
buildMatMat<N, 1000>(builder, funcName);
ModelRunner runner(builder.getModuleRef());
runner.compile(CompilationOptions());
auto err =
runner.engine->invoke(kFuncAdapterName, MutableArrayRef<void *>{args});
if (err) llvm_unreachable("Error compiling/running function.");
for (auto _ : state) {
auto err_run = runner.engine->invoke(kFuncAdapterName,
MutableArrayRef<void *>{args});
if (err_run) llvm_unreachable("Error running function.");
}
}
}
//
// Benchmark drivers (build).
//
static void BM_Build_MatVec_1(benchmark::State &state) {
testMatVecUsingVectors<1>(state, "test_matvec_1", true);
}
BENCHMARK(BM_Build_MatVec_1);
static void BM_Build_MatVec_2(benchmark::State &state) {
testMatVecUsingVectors<2>(state, "test_matvec_2", true);
}
BENCHMARK(BM_Build_MatVec_2);
static void BM_Build_MatVec_4(benchmark::State &state) {
testMatVecUsingVectors<4>(state, "test_matvec_4", true);
}
BENCHMARK(BM_Build_MatVec_4);
static void BM_Build_MatVec_8(benchmark::State &state) {
testMatVecUsingVectors<8>(state, "test_matvec_8", true);
}
BENCHMARK(BM_Build_MatVec_8);
static void BM_Build_MatVec_16(benchmark::State &state) {
testMatVecUsingVectors<16>(state, "test_matvec_16", true);
}
BENCHMARK(BM_Build_MatVec_16);
static void BM_Build_MatVec_32(benchmark::State &state) {
testMatVecUsingVectors<32>(state, "test_matvec_32", true);
}
BENCHMARK(BM_Build_MatVec_32);
//
// Benchmark drivers (run).
//
static void BM_Run1000_MatVec_1(benchmark::State &state) {
testMatVecUsingVectors<1>(state, "test_matvec_1", false);
}
BENCHMARK(BM_Run1000_MatVec_1);
static void BM_Run1000_MatVec_2(benchmark::State &state) {
testMatVecUsingVectors<2>(state, "test_matvec_2", false);
}
BENCHMARK(BM_Run1000_MatVec_2);
static void BM_Run1000_MatVec_4(benchmark::State &state) {
testMatVecUsingVectors<4>(state, "test_matvec_4", false);
}
BENCHMARK(BM_Run1000_MatVec_4);
static void BM_Run1000_MatVec_8(benchmark::State &state) {
testMatVecUsingVectors<8>(state, "test_matvec_8", false);
}
BENCHMARK(BM_Run1000_MatVec_8);
static void BM_Run1000_MatVec_16(benchmark::State &state) {
testMatVecUsingVectors<16>(state, "test_matvec_16", false);
}
BENCHMARK(BM_Run1000_MatVec_16);
static void BM_Run1000_MatVec_32(benchmark::State &state) {
testMatVecUsingVectors<32>(state, "test_matvec_32", false);
}
BENCHMARK(BM_Run1000_MatVec_32);