[ModelBuilder] JIT/RUN benchmark for matrix x vector using Vector dialect
PiperOrigin-RevId: 303166550
diff --git a/experimental/ModelBuilder/test/BUILD b/experimental/ModelBuilder/test/BUILD
index c3a7f92..f34fb38 100644
--- a/experimental/ModelBuilder/test/BUILD
+++ b/experimental/ModelBuilder/test/BUILD
@@ -90,6 +90,25 @@
)
# Benchmarks.
+
+# Matrix x Vector expressed in Vector dialect.
+cc_binary(
+ name = "bench-matvec-vector-jit",
+ srcs = ["BenchMatVecVectorJIT.cpp"],
+ tags = [
+ "noga",
+ ],
+ deps = [
+ "//experimental/ModelBuilder",
+ "//experimental/ModelBuilder:ModelRunner",
+ "@com_google_benchmark//:benchmark:benchmark_main",
+ "@llvm-project//mlir:AllPassesAndDialects",
+ "@llvm-project//mlir:EDSC",
+ "@llvm-project//mlir:IR",
+ ],
+)
+
+# Matrix x Matrix expressed in Vector dialect.
cc_binary(
name = "bench-matmul-vector-jit",
srcs = ["BenchMatMulVectorJIT.cpp"],
@@ -106,6 +125,7 @@
],
)
+# Matrix x Matrix expressed in Vector dialect (using intrinsics).
cc_binary(
name = "bench-matmul-vector-column-major-llvm-intrinsics-jit",
srcs = ["BenchMatMulVectorColumnMajorLLVMIntrinsicsJIT.cpp"],
diff --git a/experimental/ModelBuilder/test/BenchMatVecVectorJIT.cpp b/experimental/ModelBuilder/test/BenchMatVecVectorJIT.cpp
new file mode 100644
index 0000000..c59f026
--- /dev/null
+++ b/experimental/ModelBuilder/test/BenchMatVecVectorJIT.cpp
@@ -0,0 +1,211 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+#include "experimental/ModelBuilder/MemRefUtils.h"
+#include "experimental/ModelBuilder/ModelBuilder.h"
+#include "experimental/ModelBuilder/ModelRunner.h"
+
+using namespace mlir; // NOLINT
+
+// Helper method to construct an affine map.
+static AffineMap makeMap(ModelBuilder &mb, int i) {
+ SmallVector<AffineExpr, 4> results;
+ if (i == 2) {
+ results.push_back(getAffineDimExpr(0, mb.getContext()));
+ results.push_back(getAffineDimExpr(1, mb.getContext()));
+ } else {
+ results.push_back(getAffineDimExpr(i, mb.getContext()));
+ }
+ return AffineMap::get(2, 0, results);
+}
+
+// Helper method to build a NxN matrix-vector multiplication
+// that runs I times to amortize any calling overhead.
+template <unsigned N, unsigned I>
+void buildMatMat(ModelBuilder &mb, StringLiteral fn) {
+ auto f32 = mb.f32;
+ auto nnVectorType = mb.getVectorType({N, N}, f32);
+ auto typeA = mb.getMemRefType({}, nnVectorType);
+ auto nVectorType = mb.getVectorType({N}, f32);
+ auto typeB = mb.getMemRefType({}, nVectorType);
+ auto typeC = typeB;
+
+ auto f = mb.makeFunction(fn, {}, {typeA, typeB, typeC});
+ OpBuilder b(&f.getBody());
+ ScopedContext scope(b, f.getLoc());
+
+ // Build the following accesses:
+ // affine_map<(i, j) -> (i, j)>,
+ // affine_map<(i, j) -> (j)>,
+ // affine_map<(i, j) -> (i)>
+ SmallVector<AffineMap, 4> accesses;
+ accesses.push_back(makeMap(mb, 2));
+ accesses.push_back(makeMap(mb, 1));
+ accesses.push_back(makeMap(mb, 0));
+
+ // Build the following iterator types:
+ // iterator_types = ["parallel", "reduction"]
+ SmallVector<Attribute, 4> iterator_types;
+ iterator_types.push_back(mb.getStringAttr("parallel"));
+ iterator_types.push_back(mb.getStringAttr("reduction"));
+
+ // Loop I times over the kernel to amortize calling overhead.
+ auto loop =
+ b.create<loop::ForOp>(f.getLoc(), std_constant_index(0),
+ std_constant_index(I), std_constant_index(1));
+
+ OpBuilder bodyBuilder = loop.getBodyBuilder();
+ {
+ edsc::ScopedContext bodyScope(bodyBuilder, f.getLoc());
+ // Compute c += A x b.
+ StdIndexedValue A(f.getArgument(0)), B(f.getArgument(1)),
+ C(f.getArgument(2));
+ C() = (vector_contract(*A(), *B(), *C(), mb.getAffineMapArrayAttr(accesses),
+ mb.getArrayAttr(iterator_types)));
+ }
+
+ std_ret();
+}
+
+// Benchmark method.
+template <unsigned N>
+void testMatVecUsingVectors(benchmark::State &state, StringLiteral funcName,
+ bool measureBuild) {
+ // Prepare arguments beforehand.
+ auto incInit = [](unsigned idx, Vector2D<N, N, float> *ptr) {
+ float *p = reinterpret_cast<float *>(ptr + idx);
+ for (unsigned i = 0; i < N * N; ++i) p[i] = 1.0f + i;
+ };
+ auto oneInit = [](unsigned idx, Vector1D<N, float> *ptr) {
+ float *p = reinterpret_cast<float *>(ptr + idx);
+ for (unsigned i = 0; i < N; ++i) p[i] = 1.0f;
+ };
+ auto zeroInit = [](unsigned idx, Vector1D<N, float> *ptr) {
+ float *p = reinterpret_cast<float *>(ptr + idx);
+ for (unsigned i = 0; i < N; ++i) p[i] = 0.0f;
+ };
+ auto A = makeInitializedStridedMemRefDescriptor<Vector2D<N, N, float>, 1>(
+ {1}, incInit);
+ auto B = makeInitializedStridedMemRefDescriptor<Vector1D<N, float>, 1>(
+ {1}, oneInit);
+ auto C = makeInitializedStridedMemRefDescriptor<Vector1D<N, float>, 1>(
+ {1}, zeroInit);
+ auto *bufferA = A.get();
+ auto *bufferB = B.get();
+ auto *bufferC = C.get();
+ void *args[3] = {&bufferA, &bufferB, &bufferC};
+ const std::string kFuncAdapterName =
+ (llvm::Twine("_mlir_ciface_") + funcName).str();
+
+ if (measureBuild) {
+ // If this is a build-time benchmark, build, compile, and execute
+ // the function inside the timed loop, building a fresh new function
+ // in each iteration to get the full JIT time (keep I == 1 here).
+ for (auto _ : state) {
+ ModelBuilder builder;
+ buildMatMat<N, 1>(builder, funcName);
+ ModelRunner runner(builder.getModuleRef());
+ runner.compile(CompilationOptions());
+ auto err = runner.engine->invoke(kFuncAdapterName,
+ MutableArrayRef<void *>{args});
+ if (err) llvm_unreachable("Error compiling/running function.");
+ }
+ } else {
+ // If this is a run-time benchmark, build, compile, and execute
+ // the function once outside the timed loop, then continue running
+ // the same function inside the loop to focus on actual runtime
+ // (set I == 1000 here to amortize calling overhead).
+ ModelBuilder builder;
+ buildMatMat<N, 1000>(builder, funcName);
+ ModelRunner runner(builder.getModuleRef());
+ runner.compile(CompilationOptions());
+ auto err =
+ runner.engine->invoke(kFuncAdapterName, MutableArrayRef<void *>{args});
+ if (err) llvm_unreachable("Error compiling/running function.");
+ for (auto _ : state) {
+ auto err_run = runner.engine->invoke(kFuncAdapterName,
+ MutableArrayRef<void *>{args});
+ if (err_run) llvm_unreachable("Error running function.");
+ }
+ }
+}
+
+//
+// Benchmark drivers (build).
+//
+
+static void BM_Build_MatVec_1(benchmark::State &state) {
+ testMatVecUsingVectors<1>(state, "test_matvec_1", true);
+}
+BENCHMARK(BM_Build_MatVec_1);
+
+static void BM_Build_MatVec_2(benchmark::State &state) {
+ testMatVecUsingVectors<2>(state, "test_matvec_2", true);
+}
+BENCHMARK(BM_Build_MatVec_2);
+
+static void BM_Build_MatVec_4(benchmark::State &state) {
+ testMatVecUsingVectors<4>(state, "test_matvec_4", true);
+}
+BENCHMARK(BM_Build_MatVec_4);
+
+static void BM_Build_MatVec_8(benchmark::State &state) {
+ testMatVecUsingVectors<8>(state, "test_matvec_8", true);
+}
+BENCHMARK(BM_Build_MatVec_8);
+
+static void BM_Build_MatVec_16(benchmark::State &state) {
+ testMatVecUsingVectors<16>(state, "test_matvec_16", true);
+}
+BENCHMARK(BM_Build_MatVec_16);
+
+static void BM_Build_MatVec_32(benchmark::State &state) {
+ testMatVecUsingVectors<32>(state, "test_matvec_32", true);
+}
+BENCHMARK(BM_Build_MatVec_32);
+
+//
+// Benchmark drivers (run).
+//
+
+static void BM_Run1000_MatVec_1(benchmark::State &state) {
+ testMatVecUsingVectors<1>(state, "test_matvec_1", false);
+}
+BENCHMARK(BM_Run1000_MatVec_1);
+
+static void BM_Run1000_MatVec_2(benchmark::State &state) {
+ testMatVecUsingVectors<2>(state, "test_matvec_2", false);
+}
+BENCHMARK(BM_Run1000_MatVec_2);
+
+static void BM_Run1000_MatVec_4(benchmark::State &state) {
+ testMatVecUsingVectors<4>(state, "test_matvec_4", false);
+}
+BENCHMARK(BM_Run1000_MatVec_4);
+
+static void BM_Run1000_MatVec_8(benchmark::State &state) {
+ testMatVecUsingVectors<8>(state, "test_matvec_8", false);
+}
+BENCHMARK(BM_Run1000_MatVec_8);
+
+static void BM_Run1000_MatVec_16(benchmark::State &state) {
+ testMatVecUsingVectors<16>(state, "test_matvec_16", false);
+}
+BENCHMARK(BM_Run1000_MatVec_16);
+
+static void BM_Run1000_MatVec_32(benchmark::State &state) {
+ testMatVecUsingVectors<32>(state, "test_matvec_32", false);
+}
+BENCHMARK(BM_Run1000_MatVec_32);