iree/compiler/Conversion/LinalgToSPIRV/Utils.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2020 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 //===- Utils.cpp - Utility functions used in Linalg to SPIR-V lowering ----===//
 //
 // Implementaiton of utility functions used while lowering from Linalg to SPIRV.
 //
 //===----------------------------------------------------------------------===//

 #include "iree/compiler/Conversion/LinalgToSPIRV/Utils.h"

 #include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h"
 #include "iree/compiler/Conversion/LinalgToSPIRV/MemorySpace.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/SPIRV/TargetAndABI.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/Identifier.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/Region.h"
 #include "mlir/Support/LogicalResult.h"

 namespace mlir {
 namespace iree_compiler {

 LogicalResult updateWorkGroupSize(FuncOp funcOp,
                                   ArrayRef<int64_t> workGroupSize) {
   // Need to update both the surrounding FuncOp that has the spv.entry_point_abi
   // attribute, and the hal.executable.
   Region &body = funcOp.getBody();
   if (!llvm::hasSingleElement(body))
     return funcOp.emitError("unhandled dispatch function with multiple blocks");

   if (workGroupSize.size() != 3)
     return funcOp.emitError("expected workgroup size to have three entries");
   SmallVector<int32_t, 3> workGroupSizeVec = llvm::to_vector<3>(llvm::map_range(
       workGroupSize, [](int64_t v) { return static_cast<int32_t>(v); }));

   funcOp.setAttr(
       spirv::getEntryPointABIAttrName(),
       spirv::getEntryPointABIAttr(workGroupSizeVec, funcOp.getContext()));
   return success();
 }

 LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst) {
   auto copyOp = b.create<linalg::CopyOp>(src.getLoc(), src, dst);
   setMarker(copyOp, getCopyToWorkgroupMemoryMarker());
   return success();
 }

 Optional<Value> allocateWorkgroupMemory(OpBuilder &b, SubViewOp subview,
                                         ArrayRef<Value> boundingSubViewSize,
                                         OperationFolder *folder) {
   // The bounding subview size is expected to be constant. This specified the
   // shape of the allocation.
   SmallVector<int64_t, 2> shape = llvm::to_vector<2>(
       llvm::map_range(boundingSubViewSize, [](Value v) -> int64_t {
         APInt value;
         if (matchPattern(v, m_ConstantInt(&value))) return value.getSExtValue();
         return -1;
       }));
   if (llvm::any_of(shape, [](int64_t v) { return v == -1; })) return {};
   MemRefType allocType = MemRefType::get(
       shape, subview.getType().getElementType(), {}, getWorkgroupMemorySpace());
   Value buffer = b.create<AllocOp>(subview.getLoc(), allocType);
   return buffer;
 }

 LogicalResult deallocateWorkgroupMemory(OpBuilder &b, Value buffer) {
   auto allocOp = buffer.getDefiningOp<AllocOp>();
   b.create<DeallocOp>(allocOp.getLoc(), buffer);
   return success();
 }

 template <typename GPUIdOp, typename GPUCountOp>
 static linalg::ProcInfo getGPUProcessorIdAndCountImpl(OpBuilder &builder,
                                                       Location loc,
                                                       unsigned dim) {
   std::array<StringRef, kNumGPUDims> dimAttr{"x", "y", "z"};
   StringAttr attr =
       builder.getStringAttr(dimAttr[std::min<unsigned>(dim, kNumGPUDims)]);
   Type indexType = builder.getIndexType();
   return {builder.create<GPUIdOp>(loc, indexType, attr),
           builder.create<GPUCountOp>(loc, indexType, attr)};
 }

 template <>
 linalg::ProcInfo getGPUProcessorIdAndCountImpl<GPUGlobalId, GPUGlobalCount>(
     OpBuilder &builder, Location loc, unsigned dim) {
   std::array<StringRef, kNumGPUDims> dimAttr{"x", "y", "z"};
   StringAttr attr =
       builder.getStringAttr(dimAttr[std::min<unsigned>(dim, kNumGPUDims)]);
   Type indexType = builder.getIndexType();
   Value gridDim = builder.create<gpu::GridDimOp>(loc, indexType, attr);
   Value blockId = builder.create<gpu::BlockIdOp>(loc, indexType, attr);
   Value blockDim = builder.create<gpu::BlockDimOp>(loc, indexType, attr);
   Value threadId = builder.create<gpu::ThreadIdOp>(loc, indexType, attr);
   // TODO(ravishankarm): Using affine_maps here would be beneficial, and we can
   // do this because the blockDim is constant. But this would lead to an
   // ordering issue cause it assumes that the workgroup size has already been
   // set. If using affine_map can help, make sure that the workgroup size is set
   // before.
   return {builder.create<AddIOp>(
               loc, builder.create<MulIOp>(loc, blockId, blockDim), threadId),
           builder.create<MulIOp>(loc, blockDim, gridDim)};
 }

 template <typename GPUIdOp, typename GPUCountOp>
 static SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCountsImpl(
     OpBuilder &builder, Location loc, unsigned numDims) {
   SmallVector<linalg::ProcInfo, 2> procInfo(numDims);
   for (unsigned i = 0; i < numDims; ++i) {
     procInfo[numDims - 1 - i] =
         getGPUProcessorIdAndCountImpl<GPUIdOp, GPUCountOp>(builder, loc, i);
   }
   return procInfo;
 }

 template <typename GPUIdOp, typename GPUCountOp>
 SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCounts(OpBuilder &builder,
                                                              Location loc,
                                                              unsigned numDims) {
   return getGPUProcessorIdsAndCountsImpl<GPUIdOp, GPUCountOp>(builder, loc,
                                                               numDims);
 }

 /// Explicit instantiation of gpuGPUProcessorIdsAndCounts.
 template SmallVector<linalg::ProcInfo, 2>
 getGPUProcessorIdsAndCounts<gpu::BlockIdOp, gpu::GridDimOp>(OpBuilder &builder,
                                                             Location loc,
                                                             unsigned numDims);
 template SmallVector<linalg::ProcInfo, 2>
 getGPUProcessorIdsAndCounts<gpu::ThreadIdOp, gpu::BlockDimOp>(
     OpBuilder &builder, Location loc, unsigned numDims);
 template SmallVector<linalg::ProcInfo, 2>
 getGPUProcessorIdsAndCounts<GPUGlobalId, GPUGlobalCount>(OpBuilder &builder,
                                                          Location loc,
                                                          unsigned numDims);
 }  // namespace iree_compiler
 }  // namespace mlir
	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	//===- Utils.cpp - Utility functions used in Linalg to SPIR-V lowering ----===//
	//
	// Implementaiton of utility functions used while lowering from Linalg to SPIRV.
	//
	//===----------------------------------------------------------------------===//

	#include "iree/compiler/Conversion/LinalgToSPIRV/Utils.h"

	#include "iree/compiler/Conversion/CodegenUtils/MarkerUtils.h"
	#include "iree/compiler/Conversion/LinalgToSPIRV/MemorySpace.h"
	#include "mlir/Dialect/GPU/GPUDialect.h"
	#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
	#include "mlir/Dialect/Linalg/Utils/Utils.h"
	#include "mlir/Dialect/SPIRV/TargetAndABI.h"
	#include "mlir/IR/Function.h"
	#include "mlir/IR/Identifier.h"
	#include "mlir/IR/Matchers.h"
	#include "mlir/IR/Region.h"
	#include "mlir/Support/LogicalResult.h"

	namespace mlir {
	namespace iree_compiler {

	LogicalResult updateWorkGroupSize(FuncOp funcOp,
	ArrayRef<int64_t> workGroupSize) {
	// Need to update both the surrounding FuncOp that has the spv.entry_point_abi
	// attribute, and the hal.executable.
	Region &body = funcOp.getBody();
	if (!llvm::hasSingleElement(body))
	return funcOp.emitError("unhandled dispatch function with multiple blocks");

	if (workGroupSize.size() != 3)
	return funcOp.emitError("expected workgroup size to have three entries");
	SmallVector<int32_t, 3> workGroupSizeVec = llvm::to_vector<3>(llvm::map_range(
	workGroupSize, [](int64_t v) { return static_cast<int32_t>(v); }));

	funcOp.setAttr(
	spirv::getEntryPointABIAttrName(),
	spirv::getEntryPointABIAttr(workGroupSizeVec, funcOp.getContext()));
	return success();
	}

	LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst) {
	auto copyOp = b.create<linalg::CopyOp>(src.getLoc(), src, dst);
	setMarker(copyOp, getCopyToWorkgroupMemoryMarker());
	return success();
	}

	Optional<Value> allocateWorkgroupMemory(OpBuilder &b, SubViewOp subview,
	ArrayRef<Value> boundingSubViewSize,
	OperationFolder *folder) {
	// The bounding subview size is expected to be constant. This specified the
	// shape of the allocation.
	SmallVector<int64_t, 2> shape = llvm::to_vector<2>(
	llvm::map_range(boundingSubViewSize, [](Value v) -> int64_t {
	APInt value;
	if (matchPattern(v, m_ConstantInt(&value))) return value.getSExtValue();
	return -1;
	}));
	if (llvm::any_of(shape, [](int64_t v) { return v == -1; })) return {};
	MemRefType allocType = MemRefType::get(
	shape, subview.getType().getElementType(), {}, getWorkgroupMemorySpace());
	Value buffer = b.create<AllocOp>(subview.getLoc(), allocType);
	return buffer;
	}

	LogicalResult deallocateWorkgroupMemory(OpBuilder &b, Value buffer) {
	auto allocOp = buffer.getDefiningOp<AllocOp>();
	b.create<DeallocOp>(allocOp.getLoc(), buffer);
	return success();
	}

	template <typename GPUIdOp, typename GPUCountOp>
	static linalg::ProcInfo getGPUProcessorIdAndCountImpl(OpBuilder &builder,
	Location loc,
	unsigned dim) {
	std::array<StringRef, kNumGPUDims> dimAttr{"x", "y", "z"};
	StringAttr attr =
	builder.getStringAttr(dimAttr[std::min<unsigned>(dim, kNumGPUDims)]);
	Type indexType = builder.getIndexType();
	return {builder.create<GPUIdOp>(loc, indexType, attr),
	builder.create<GPUCountOp>(loc, indexType, attr)};
	}

	template <>
	linalg::ProcInfo getGPUProcessorIdAndCountImpl<GPUGlobalId, GPUGlobalCount>(
	OpBuilder &builder, Location loc, unsigned dim) {
	std::array<StringRef, kNumGPUDims> dimAttr{"x", "y", "z"};
	StringAttr attr =
	builder.getStringAttr(dimAttr[std::min<unsigned>(dim, kNumGPUDims)]);
	Type indexType = builder.getIndexType();
	Value gridDim = builder.create<gpu::GridDimOp>(loc, indexType, attr);
	Value blockId = builder.create<gpu::BlockIdOp>(loc, indexType, attr);
	Value blockDim = builder.create<gpu::BlockDimOp>(loc, indexType, attr);
	Value threadId = builder.create<gpu::ThreadIdOp>(loc, indexType, attr);
	// TODO(ravishankarm): Using affine_maps here would be beneficial, and we can
	// do this because the blockDim is constant. But this would lead to an
	// ordering issue cause it assumes that the workgroup size has already been
	// set. If using affine_map can help, make sure that the workgroup size is set
	// before.
	return {builder.create<AddIOp>(
	loc, builder.create<MulIOp>(loc, blockId, blockDim), threadId),
	builder.create<MulIOp>(loc, blockDim, gridDim)};
	}

	template <typename GPUIdOp, typename GPUCountOp>
	static SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCountsImpl(
	OpBuilder &builder, Location loc, unsigned numDims) {
	SmallVector<linalg::ProcInfo, 2> procInfo(numDims);
	for (unsigned i = 0; i < numDims; ++i) {
	procInfo[numDims - 1 - i] =
	getGPUProcessorIdAndCountImpl<GPUIdOp, GPUCountOp>(builder, loc, i);
	}
	return procInfo;
	}

	template <typename GPUIdOp, typename GPUCountOp>
	SmallVector<linalg::ProcInfo, 2> getGPUProcessorIdsAndCounts(OpBuilder &builder,
	Location loc,
	unsigned numDims) {
	return getGPUProcessorIdsAndCountsImpl<GPUIdOp, GPUCountOp>(builder, loc,
	numDims);
	}

	/// Explicit instantiation of gpuGPUProcessorIdsAndCounts.
	template SmallVector<linalg::ProcInfo, 2>
	getGPUProcessorIdsAndCounts<gpu::BlockIdOp, gpu::GridDimOp>(OpBuilder &builder,
	Location loc,
	unsigned numDims);
	template SmallVector<linalg::ProcInfo, 2>
	getGPUProcessorIdsAndCounts<gpu::ThreadIdOp, gpu::BlockDimOp>(
	OpBuilder &builder, Location loc, unsigned numDims);
	template SmallVector<linalg::ProcInfo, 2>
	getGPUProcessorIdsAndCounts<GPUGlobalId, GPUGlobalCount>(OpBuilder &builder,
	Location loc,
	unsigned numDims);
	} // namespace iree_compiler
	} // namespace mlir