|  | // Copyright 2020 Google LLC | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //      https://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // MemRefUtils.h | 
|  | // ----------------------------------------------------------------------------- | 
|  | // | 
|  | // Utils for MLIR ABI interfacing with frameworks | 
|  | // | 
|  | // The templated free functions below make it possible to allocate dense | 
|  | // contiguous buffers with shapes that interoperate properly with the MLIR | 
|  | // codegen ABI. | 
|  | // | 
|  | // ``` | 
|  | //  // 1. Compile and build a model, prepare the runner. | 
|  | //  ModelRunner runner = ...; | 
|  | // | 
|  | //  // 2. Allocate managed input and outputs with proper shapes and init value. | 
|  | //  auto inputLinearInit = [](unsigned idx, float *ptr) { *ptr = 0.032460f; }; | 
|  | //  auto inputBuffer = makeInitializedStridedMemRefDescriptor<float, 2>( | 
|  | //     {B, W0}, inputLinearInit); | 
|  | //  auto outputLinearInit = [](unsigned idx, float *ptr) { *ptr = 0.0f; }; | 
|  | //  auto outputBuffer = makeInitializedStridedMemRefDescriptor<float, 2>( | 
|  | //     {B, W3}, outputLinearInit); | 
|  | // | 
|  | //  // 3. Pack pointers to MLIR ABI compliant buffers and call the named func. | 
|  | //  void *packedArgs[2] = {&inputBuffer->descriptor, &outputBuffer->descriptor}; | 
|  | //  runner.engine->invoke(funcName, llvm::MutableArrayRef<void *>{packedArgs}); | 
|  | // ``` | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <array> | 
|  | #include <cassert> | 
|  | #include <functional> | 
|  | #include <memory> | 
|  |  | 
|  | #include "llvm/ADT/Optional.h" | 
|  | #include "mlir/ExecutionEngine/CRunnerUtils.h" | 
|  |  | 
|  | #ifndef IREE_EXPERIMENTAL_MODELBUILDER_MEMREFUTILS_H_ | 
|  | #define IREE_EXPERIMENTAL_MODELBUILDER_MEMREFUTILS_H_ | 
|  |  | 
|  | namespace mlir { | 
|  | using AllocFunType = std::function<void *(size_t)>; | 
|  |  | 
|  | namespace detail { | 
|  |  | 
|  | // Given a shape with sizes greater than 0 along all dimensions, | 
|  | // returns the distance, in number of elements, between a slice in a dimension | 
|  | // and the next slice in the same dimension. | 
|  | //   e.g. shape[3, 4, 5] -> strides[20, 5, 1] | 
|  | template <size_t N> | 
|  | inline std::array<int64_t, N> makeStrides(const std::array<int64_t, N> &shape) { | 
|  | if (N == 0) return shape; | 
|  | std::array<int64_t, N> res; | 
|  | int64_t running = 1; | 
|  | for (int64_t idx = N - 1; idx >= 0; --idx) { | 
|  | assert(shape[idx] && "size must be nonnegatice for all shape dimensions"); | 
|  | res[idx] = running; | 
|  | running *= shape[idx]; | 
|  | } | 
|  | return res; | 
|  | } | 
|  |  | 
|  | // Mallocs a StridedMemRefDescriptor<T, N>* that matches the MLIR ABI. | 
|  | // This is an implementation detail that is kept in sync with MLIR codegen | 
|  | // conventions. | 
|  | template <typename T, int N> | 
|  | typename std::enable_if<(N >= 1), StridedMemRefType<T, N> *>::type | 
|  | makeStridedMemRefDescriptor(void *ptr, void *alignedPtr, | 
|  | const std::array<int64_t, N> &shape, | 
|  | AllocFunType alloc = &::malloc) { | 
|  | StridedMemRefType<T, N> *descriptor = static_cast<StridedMemRefType<T, N> *>( | 
|  | alloc(sizeof(StridedMemRefType<T, N>))); | 
|  | descriptor->basePtr = static_cast<T *>(ptr); | 
|  | descriptor->data = static_cast<T *>(alignedPtr); | 
|  | descriptor->offset = 0; | 
|  | std::copy(shape.begin(), shape.end(), descriptor->sizes); | 
|  | auto strides = makeStrides<N>(shape); | 
|  | std::copy(strides.begin(), strides.end(), descriptor->strides); | 
|  | return descriptor; | 
|  | } | 
|  |  | 
|  | // Mallocs a StridedMemRefDescriptor<T, 0>* (i.e. a pointer to scalar) that | 
|  | // matches the MLIR ABI. This is an implementation detail that is kept in sync | 
|  | // with MLIR codegen conventions. | 
|  | template <typename T, int N> | 
|  | typename std::enable_if<(N == 0), StridedMemRefType<T, 0> *>::type | 
|  | makeStridedMemRefDescriptor(void *ptr, void *alignedPtr, | 
|  | const std::array<int64_t, N> &shape = {}, | 
|  | AllocFunType alloc = &::malloc) { | 
|  | StridedMemRefType<T, 0> *descriptor = static_cast<StridedMemRefType<T, 0> *>( | 
|  | alloc(sizeof(StridedMemRefType<T, 0>))); | 
|  | descriptor->basePtr = static_cast<T *>(ptr); | 
|  | descriptor->data = static_cast<T *>(alignedPtr); | 
|  | descriptor->offset = 0; | 
|  | return descriptor; | 
|  | } | 
|  |  | 
|  | // Mallocs an UnrankedMemRefType<T>* that contains a ranked | 
|  | // StridedMemRefDescriptor<T, Rank>* and matches the MLIR ABI. This is an | 
|  | // implementation detail that is kept in sync with MLIR codegen conventions. | 
|  | template <typename T, int N> | 
|  | ::UnrankedMemRefType<T> *allocUnrankedDescriptor( | 
|  | void *data, void *alignedData, const std::array<int64_t, N> &shape, | 
|  | AllocFunType alloc = &::malloc) { | 
|  | ::UnrankedMemRefType<T> *res = static_cast<::UnrankedMemRefType<T> *>( | 
|  | alloc(sizeof(::UnrankedMemRefType<T>))); | 
|  | res->rank = N; | 
|  | res->descriptor = makeStridedMemRefDescriptor<T, N>(data, alignedData, shape); | 
|  | return res; | 
|  | } | 
|  |  | 
|  | // Frees an UnrankedMemRefType<T>* | 
|  | template <typename T> | 
|  | void freeUnrankedDescriptor(::UnrankedMemRefType<T> *desc) { | 
|  | free(desc->descriptor); | 
|  | free(desc); | 
|  | } | 
|  |  | 
|  | }  // namespace detail | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Public API | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | // Inefficient initializer called on each element during | 
|  | // `makeInitializedUnrankedDescriptor`. Takes the linear index and the shape so | 
|  | // that it can work in a generic fashion. The user can capture the shape and | 
|  | // delinearize if appropriate. | 
|  | template <typename T> | 
|  | using LinearInitializer = std::function<void(unsigned idx, T *ptr)>; | 
|  |  | 
|  | inline uint32_t pow2msb(uint32_t val) { | 
|  | assert(val > 0); | 
|  | val--; | 
|  | val |= val >> 1; | 
|  | val |= val >> 2; | 
|  | val |= val >> 4; | 
|  | val |= val >> 8; | 
|  | val |= val >> 16; | 
|  | return val + 1; | 
|  | } | 
|  |  | 
|  | // No such thing as a portable posize_memalign, roll our own. | 
|  | // [alignment] allow to specify an arbitrary alignment. It must be a power of 2 | 
|  | // and greater than the size of T. By default the alignment is sizeof(T). | 
|  | template <typename T> | 
|  | std::pair<void *, void *> allocAligned( | 
|  | size_t nElements, AllocFunType alloc = &::malloc, | 
|  | llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>()) { | 
|  | assert(sizeof(T) < (1ul << 32) && "Elemental type overflows"); | 
|  | auto size = nElements * sizeof(T); | 
|  | auto desiredAlignment = alignment.getValueOr(pow2msb(sizeof(T))); | 
|  | assert((desiredAlignment & (desiredAlignment - 1)) == 0); | 
|  | assert(desiredAlignment >= sizeof(T)); | 
|  | void *data = alloc(size + desiredAlignment); | 
|  | uintptr_t addr = reinterpret_cast<uintptr_t>(data); | 
|  | uintptr_t rem = addr % desiredAlignment; | 
|  | void *alignedData = | 
|  | (rem == 0) ? data | 
|  | : reinterpret_cast<void *>(addr + (desiredAlignment - rem)); | 
|  | assert(reinterpret_cast<uintptr_t>(alignedData) % desiredAlignment == 0); | 
|  | return std::pair<void *, void *>(data, alignedData); | 
|  | } | 
|  |  | 
|  | // Entry point to allocate a dense buffer with a given `shape` and initializer | 
|  | // of type PointwiseInitializer. Can optionally take specific `alloc` and `free` | 
|  | // functions. | 
|  | template <typename T, int N, typename FreeFunType = decltype(&::free)> | 
|  | std::unique_ptr<::UnrankedMemRefType<float>, FreeFunType> | 
|  | makeInitializedUnrankedDescriptor( | 
|  | const std::array<int64_t, N> &shape, LinearInitializer<T> init, | 
|  | llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>(), | 
|  | AllocFunType alloc = &::malloc, FreeFunType freeFun = &::free) { | 
|  | int64_t nElements = 1; | 
|  | for (int64_t s : shape) nElements *= s; | 
|  | auto allocated = allocAligned<T>(nElements, alloc, alignment); | 
|  | auto *data = static_cast<T *>(allocated.first); | 
|  | auto *alignedData = static_cast<T *>(allocated.second); | 
|  | for (unsigned i = 0; i < nElements; ++i) init(i, alignedData); | 
|  | return std::unique_ptr<::UnrankedMemRefType<float>, FreeFunType>( | 
|  | detail::allocUnrankedDescriptor<T, N>(data, alignedData, shape), freeFun); | 
|  | } | 
|  |  | 
|  | // Entry point to allocate a dense buffer with a given `shape` and initializer | 
|  | // of type PointwiseInitializer. Can optionally take specific `alloc` and `free` | 
|  | // functions. | 
|  | template <typename T, int N, typename FreeFunType = decltype(&::free)> | 
|  | std::unique_ptr<StridedMemRefType<T, N>, FreeFunType> | 
|  | makeInitializedStridedMemRefDescriptor( | 
|  | const std::array<int64_t, N> &shape, LinearInitializer<T> init, | 
|  | llvm::Optional<uint64_t> alignment = llvm::Optional<uint64_t>(), | 
|  | AllocFunType alloc = &::malloc, FreeFunType freeFun = &::free) { | 
|  | int64_t nElements = 1; | 
|  | for (int64_t s : shape) nElements *= s; | 
|  | auto allocated = allocAligned<T>(nElements, alloc, alignment); | 
|  | auto *data = static_cast<T *>(allocated.first); | 
|  | auto *alignedData = static_cast<T *>(allocated.second); | 
|  | for (unsigned i = 0; i < nElements; ++i) init(i, alignedData); | 
|  | return std::unique_ptr<StridedMemRefType<T, N>, FreeFunType>( | 
|  | detail::makeStridedMemRefDescriptor<T, N>(data, alignedData, shape, | 
|  | alloc), | 
|  | freeFun); | 
|  | } | 
|  |  | 
|  | }  // namespace mlir | 
|  |  | 
|  | #endif  // IREE_EXPERIMENTAL_MODELBUILDER_MEMREFUTILS_H_ |