| // Copyright 2020 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // RUN: test-mnist-jit 2>&1 | IreeFileCheck %s |
| |
| #include "experimental/ModelBuilder/ModelBuilder.h" |
| #include "experimental/ModelBuilder/ModelRunner.h" |
| // RunnerUtils.h with iostream needed for printMemRef atm |
| #include "mlir/ExecutionEngine/RunnerUtils.h" |
| |
| using namespace mlir; // NOLINT |
| |
| // Helper function to build a func "mnist" that takes a memref<?x784xf32> buffer |
| // (use batch size B=3 in this example). |
| // |
| // This is a 3-layer MLP with static weights of sizes W0xW1, W1xW2 and W2xW3. |
| // In between each fully-connected layer we have a non-linearity fused with the |
| // bias addition. |
| // |
| // The fused non-linearity computes: |
| // `0.5f * tanh(0.5f * (x + bias)) + 0.5f` |
| // |
| // Most of the code below is about allocating and initializing buffers for the |
| // model weights and biases + cleaning up on exit. Most of this will disappear |
| // when starting from a tensor abstraction and are able to automatically attach |
| // to a preloaded model in memory. |
| // |
| // The interesting part of the model is the 3-MLP part: |
| // ``` |
| // modelBuilder.FCBiasTanh({input, h1Weights, outputBlock1}, bias1); |
| // modelBuilder.FCBiasTanh({outputBlock1, h2Weights, outputBlock2}, bias2); |
| // modelBuilder.FCBiasTanh({outputBlock2, h3Weights, outputBlock3}, bias3); |
| // ``` |
| void buildMNIST(ModelBuilder &modelBuilder, StringLiteral funcName, unsigned B, |
| unsigned W0, unsigned W1, unsigned W2, unsigned W3) { |
| auto f32 = modelBuilder.f32; |
| auto inputType = modelBuilder.getMemRefType({-1, W0}, f32); |
| auto outputType = modelBuilder.getMemRefType({-1, W3}, f32); |
| auto func = |
| modelBuilder.makeFunction(funcName, {}, {inputType, outputType}, |
| MLIRFuncOpConfig().setEmitCInterface(true)); |
| |
| // Fill the body (3 blocks of FCBiasTanh), alloc everything manually atm. |
| OpBuilder b(&func.getBody()); |
| ScopedContext scope(b, func.getLoc()); |
| Value input = func.getArgument(0); |
| Value batchSize = std_dim(input, 0); |
| Value h1Weights = std_alloc(modelBuilder.getMemRefType({W0, W1}, f32)); |
| Value h2Weights = std_alloc(modelBuilder.getMemRefType({W1, W2}, f32)); |
| Value h3Weights = std_alloc(modelBuilder.getMemRefType({W2, W3}, f32)); |
| Value bias1 = std_alloc(modelBuilder.getMemRefType({W1}, f32)); |
| Value bias2 = std_alloc(modelBuilder.getMemRefType({W2}, f32)); |
| Value bias3 = std_alloc(modelBuilder.getMemRefType({W3}, f32)); |
| Value outputBlock1 = |
| std_alloc(modelBuilder.getMemRefType({-1, W1}, f32), batchSize); |
| Value outputBlock2 = |
| std_alloc(modelBuilder.getMemRefType({-1, W2}, f32), batchSize); |
| Value outputBlock3 = func.getArgument(1); |
| |
| Value flt_0 = modelBuilder.constant_f32(0.0f); |
| Value someVal = modelBuilder.constant_f32(0.1123f); |
| linalg_fill(h1Weights, someVal); |
| linalg_fill(h2Weights, someVal); |
| linalg_fill(h3Weights, someVal); |
| linalg_fill(bias1, someVal); |
| linalg_fill(bias2, someVal); |
| linalg_fill(bias3, someVal); |
| linalg_fill(outputBlock1, flt_0); |
| linalg_fill(outputBlock2, flt_0); |
| |
| modelBuilder.FCBiasTanh({input, h1Weights, outputBlock1}, bias1); |
| modelBuilder.FCBiasTanh({outputBlock1, h2Weights, outputBlock2}, bias2); |
| modelBuilder.FCBiasTanh({outputBlock2, h3Weights, outputBlock3}, bias3); |
| |
| // TODO(ntv): tensor->buffer, drop all alloc/fill/dealloc. |
| // Vexing parses. |
| (std_dealloc(h1Weights)); |
| (std_dealloc(h2Weights)); |
| (std_dealloc(h3Weights)); |
| (std_dealloc(bias1)); |
| (std_dealloc(bias2)); |
| (std_dealloc(bias3)); |
| (std_dealloc(outputBlock1)); |
| (std_dealloc(outputBlock2)); |
| |
| (std_ret()); |
| } |
| |
| // Helper function to build a func `funcName` that takes a tensors for the input |
| // in the form of a `tensor<?x784xf32>` as well as static tensors for all the |
| // weights and biases. |
| // |
| // This is the counterpart of `buildMNIST` which builds a similar model on |
| // buffers. |
| void buildMNISTOnTensors(ModelBuilder &modelBuilder, StringLiteral funcName, |
| int64_t B, int64_t W0, int64_t W1, int64_t W2, |
| int64_t W3) { |
| auto f32 = modelBuilder.f32; |
| auto inputType = modelBuilder.getRankedTensorType({B, W0}, f32); |
| auto h1WeightsType = modelBuilder.getRankedTensorType({W0, W1}, f32); |
| auto h2WeightsType = modelBuilder.getRankedTensorType({W1, W2}, f32); |
| auto h3WeightsType = modelBuilder.getRankedTensorType({W2, W3}, f32); |
| auto bias1Type = modelBuilder.getRankedTensorType({W1}, f32); |
| auto bias2Type = modelBuilder.getRankedTensorType({W2}, f32); |
| auto bias3Type = modelBuilder.getRankedTensorType({W3}, f32); |
| auto outputType = modelBuilder.getRankedTensorType({B, W3}, f32); |
| auto func = modelBuilder.makeFunction( |
| funcName, {outputType}, |
| {inputType, h1WeightsType, h2WeightsType, h3WeightsType, bias1Type, |
| bias2Type, bias3Type}); |
| Value input = func.getArgument(0); |
| Value h1Weights = func.getArgument(1); |
| Value h2Weights = func.getArgument(2); |
| Value h3Weights = func.getArgument(3); |
| Value bias1 = func.getArgument(4); |
| Value bias2 = func.getArgument(5); |
| Value bias3 = func.getArgument(6); |
| |
| // 2. Fill the body (3 blocks of FCBiasTanh), alloc everything manually atm. |
| OpBuilder b(&func.getBody()); |
| ScopedContext scope(b, func.getLoc()); |
| |
| auto outputBlock1Type = modelBuilder.getRankedTensorType({B, W1}, f32); |
| auto outputBlock1 = modelBuilder.FCBiasTanhTensors(outputBlock1Type, |
| {input, h1Weights}, bias1); |
| auto outputBlock2Type = modelBuilder.getRankedTensorType({B, W2}, f32); |
| auto outputBlock2 = modelBuilder.FCBiasTanhTensors( |
| outputBlock2Type, {outputBlock1, h2Weights}, bias2); |
| auto outputBlock3Type = outputType; |
| auto outputBlock3 = modelBuilder.FCBiasTanhTensors( |
| outputBlock3Type, {outputBlock2, h3Weights}, bias3); |
| // Vexing parses. |
| (std_ret(outputBlock3)); |
| } |
| |
| int main() { |
| ModelBuilder::registerAllDialects(); |
| constexpr unsigned B = 3, W0 = 784, W1 = 256, W2 = 256, W3 = 10; |
| |
| ModelBuilder modelBuilder; |
| // 1. Build a func "test_mnist_jit_tensors". |
| constexpr StringLiteral kFuncTensorsName = "test_mnist_jit_tensors"; |
| buildMNISTOnTensors(modelBuilder, kFuncTensorsName, ShapedType::kDynamicSize, |
| W0, W1, W2, W3); |
| // 1.b. Dump the function for testing and erase it: we can't compile it to |
| // buffers for now. |
| modelBuilder.getModuleRef()->dump(); |
| SymbolTable::lookupNearestSymbolFrom( |
| modelBuilder.getModuleRef()->getOperation(), kFuncTensorsName) |
| ->erase(); |
| |
| // 2. Build a separate func "test_mnist_jit_buffers" that takes a |
| // memref<?x784xf32> buffer |
| // (use batch size M=3 in this example) |
| // In the future, when we can lower the function built in 1. to buffers we |
| // will. |
| constexpr StringLiteral kFuncBuffersName = "test_mnist_jit_buffers"; |
| buildMNIST(modelBuilder, kFuncBuffersName, B, W0, W1, W2, W3); |
| |
| // 3. Compile the function. |
| ModelRunner runner(modelBuilder.getModuleRef()); |
| runner.compile(CompilationOptions()); |
| |
| // 4. Allocate data within data structures that interoperate with the MLIR ABI |
| // conventions used by codegen. |
| auto inputLinearInit = [](unsigned idx, float *ptr) { |
| *(ptr + idx) = 0.032460f; |
| }; |
| // Exercise the ranked strided memref descriptor. |
| auto inputBuffer = makeInitializedStridedMemRefDescriptor<float, 2>( |
| {B, W0}, inputLinearInit); |
| auto outputLinearInit = [](unsigned idx, float *ptr) { *(ptr + idx) = 0.0f; }; |
| // Exercise the unranked memref descriptor, with extra level of indirection. |
| auto outputBuffer = |
| makeInitializedUnrankedDescriptor<float, 2>({B, W3}, outputLinearInit); |
| |
| // 5. Call the funcOp name `kFuncBuffersName` with arguments. |
| auto *inputDescriptor = inputBuffer.get(); |
| void *args[] = {&inputDescriptor, &outputBuffer->descriptor}; |
| auto error = runner.invokeIndirect(kFuncBuffersName, args); |
| |
| // 6. Dump content of output buffer for testing with FileCheck. |
| if (error) { |
| runner.module->dump(); |
| llvm::errs() << "ERROR: " << error << "\n"; |
| return 1; |
| } |
| ::impl::printMemRef( |
| *static_cast<StridedMemRefType<float, 2> *>(outputBuffer->descriptor)); |
| } |
| |
| // For now, we can only dump the IR for `test_mnist_jit_tensors`. |
| // Once buffer allocation is implemented we will only have an execution test. |
| // |
| // CHECK: func @test_mnist_jit_tensors |
| // |
| // Matmul |
| // CHECK: linalg.generic |
| // CHECK: tensor<?x784xf32>, tensor<784x256xf32> -> tensor<?x256xf32> |
| // |
| // Pointwise |
| // CHECK: linalg.generic |
| // CHECK: addf |
| // CHECK: mulf |
| // CHECK: tanh |
| // CHECK: mulf |
| // CHECK: addf |
| // CHECK: addf |
| // CHECK: tensor<?x256xf32>, tensor<256xf32> -> tensor<?x256xf32> |
| // |
| // Matmul |
| // CHECK: linalg.generic |
| // CHECK: tensor<?x256xf32>, tensor<256x256xf32> -> tensor<?x256xf32> |
| // |
| // Pointwise |
| // CHECK: linalg.generic |
| // CHECK: tensor<?x256xf32>, tensor<256xf32> -> tensor<?x256xf32> |
| // |
| // Matmul |
| // CHECK: linalg.generic |
| // CHECK: tensor<?x256xf32>, tensor<256x10xf32> -> tensor<?x10xf32> |
| // |
| // Pointwise |
| // CHECK: linalg.generic |
| // CHECK: tensor<?x10xf32>, tensor<10xf32> -> tensor<?x10xf32> |
| // CHECK: return {{.*}} : tensor<?x10xf32> |
| |
| // Execution test for `test_mnist_jit_buffers`. |
| // |
| // CHECK: Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [3, 10] |
| // CHECK-SAME: strides = [10, 1] data = |
| // clang-format off |
| // CHECK-COUNT-3: {{.*[[:space:]].*}}[3177.93, 3177.93, 3177.93, 3177.93, 3177.93, 3177.93, 3177.93, 3177.93, 3177.93, 3177.93] |
| // clang-format on |