samples/custom_dispatch/cuda/kernels/CMakeLists.txt - 3p/openxla/iree - Git at Google

 # Copyright 2022 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 if(NOT IREE_TARGET_BACKEND_CUDA OR NOT IREE_HAL_DRIVER_CUDA)
   return()
 endif()

 # NOTE: this is not how one should actually build their PTX files. Do not use
 # this as an authoritative source for compilation settings or CMake goo. If you
 # choose to go the route of custom CUDA kernels you must bring your own build
 # infrastructure. This sample only demonstrates how to use compiled PTX blobs
 # inside of the IREE compiler and this is the minimum amount of hacking that
 # could be done to do that.

 # Default to using our own clang. The NVCC route is preserved as an example but
 # to allow for consistent cross-compiling we default to clang - they should be
 # effectively the same for our purposes (device only code to LTO-IR/PTX).
 set(_BUILD_WITH_NVCC ON)

 if(_BUILD_WITH_NVCC)
   include(CheckLanguage)
   check_language(CUDA)
   if(NOT CMAKE_CUDA_COMPILER)
     message(STATUS "IREE custom_dispatch/cuda/kernels ignored -- nvcc not found")
     return()
   endif()
   enable_language(CUDA)
 endif()

 # Builds a PTX blob using cmake + nvcc from the CUDA SDK.
 function(cuda_kernel_ptx_nvcc _ARCH)
   set(_NAME iree_samples_custom_dispatch_cuda_kernels_ptx_${_ARCH})
   set(_PTX_SRC_NAME "kernels.cu")
   get_filename_component(_PTX_SRC_BASENAME ${_PTX_SRC_NAME} NAME_WE CACHE)
   set(_PTX_OBJ_NAME "${_PTX_SRC_BASENAME}_sm_${_ARCH}")

   add_library(${_NAME}_obj OBJECT)
   target_sources(${_NAME}_obj PRIVATE ${_PTX_SRC_NAME})
   set_source_files_properties(${_PTX_SRC_NAME} PROPERTIES LANGUAGE CUDA)
   set_target_properties(${_NAME}_obj PROPERTIES
     LANGUAGE CUDA
     LINKER_LANGUAGE CUDA
     CUDA_PTX_COMPILATION ON
     CUDA_SEPARABLE_COMPILATION ON
     CUDA_RESOLVE_DEVICE_SYMBOLS ON
     CUDA_ARCHITECTURES "${_ARCH}"
   )

   # This makes my eyes bleed. There is probably a much better way of doing this
   # and I wish the best of luck to those who try. From:
   # https://sourcegraph.com/github.com/NVIDIA/MDL-SDK/-/blob/cmake/utilities.cmake?L1266
   # This sample should probably just invoke nvcc directly.
   get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
   if(_GENERATOR_IS_MULTI_CONFIG)
     set(_PTX_CONFIG_FOLDER /$<CONFIG>)
     set(_CMAKEFILES_FOLDER "")
   else()
     set(_PTX_CONFIG_FOLDER "")
     set(_CMAKEFILES_FOLDER /CMakeFiles)
   endif()
   add_custom_command(
     OUTPUT ${_PTX_OBJ_NAME}.ptx
     DEPENDS $<TARGET_OBJECTS:${_NAME}_obj>
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}
     COMMAND ${CMAKE_COMMAND} -E copy_if_different
         ${CMAKE_CURRENT_BINARY_DIR}${_CMAKEFILES_FOLDER}/${_NAME}_obj.dir${_PTX_CONFIG_FOLDER}/${_PTX_SRC_BASENAME}.ptx
         ${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}.ptx
   )
   add_custom_target(${_NAME} DEPENDS
     ${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}.ptx
   )
   add_dependencies(iree-sample-deps "${_NAME}")
 endfunction()

 # Builds a PTX blob using the clang built by IREE from tip-of-tree LLVM.
 function(cuda_kernel_ptx_clang _ARCH)
   set(_NAME iree_samples_custom_dispatch_cuda_kernels_ptx_${_ARCH})
   set(_PTX_SRC_NAME "kernels.cu")
   get_filename_component(_PTX_SRC_BASENAME ${_PTX_SRC_NAME} NAME_WE CACHE)
   set(_PTX_OBJ_NAME "${_PTX_SRC_BASENAME}_sm_${_ARCH}.ptx")
   add_custom_command(
     OUTPUT
       ${_PTX_OBJ_NAME}
     DEPENDS
       ${_PTX_SRC_NAME}
       ${IREE_CLANG_TARGET}
     COMMAND ${IREE_CLANG_BINARY}
       -x cuda
       -Wno-unknown-cuda-version
       --cuda-path=${CUDAToolkit_ROOT}
       --cuda-device-only
       --cuda-gpu-arch=sm_${_ARCH}
       -O2
       -S
       ${CMAKE_CURRENT_SOURCE_DIR}/${_PTX_SRC_NAME}
       -o ${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}
     VERBATIM
   )
   add_custom_target(${_NAME} DEPENDS
     ${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}
   )
   add_dependencies(iree-sample-deps "${_NAME}")
 endfunction()

 # Build the kernels_*.ptx files for each architecture we target.
 if(_BUILD_WITH_NVCC)
   cuda_kernel_ptx_nvcc(52)
   cuda_kernel_ptx_nvcc(80)
 else()
   cuda_kernel_ptx_clang(52)
   cuda_kernel_ptx_clang(80)
 endif()

 iree_lit_test_suite(
   NAME
     example
   SRCS
     "example.mlir"
   TOOLS
     FileCheck
     iree-compile
     iree-run-module
   LABELS
     "driver=cuda"
     "hostonly"
 )

 iree_cuda_bitcode_library(
   NAME
     cuda_ukernel
   CUDA_ARCH
     sm_60
   SRCS
     "ukernel.cu"
 )

 iree_check_single_backend_test_suite(
   NAME
     check_cuda_ukernel
   SRCS
     "ukernel_example.mlir"
   TARGET_BACKEND
     "cuda"
   COMPILER_FLAGS
     "--iree-link-bitcode=cuda_ukernel.bc"
   DRIVER
     "cuda"
   LABELS
     "noasan"
     "nomsan"
     "notsan"
     "noubsan"
     "requires-gpu-nvidia"
   DEPENDS
     ::cuda_ukernel
 )
	# Copyright 2022 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	if(NOT IREE_TARGET_BACKEND_CUDA OR NOT IREE_HAL_DRIVER_CUDA)
	return()
	endif()

	# NOTE: this is not how one should actually build their PTX files. Do not use
	# this as an authoritative source for compilation settings or CMake goo. If you
	# choose to go the route of custom CUDA kernels you must bring your own build
	# infrastructure. This sample only demonstrates how to use compiled PTX blobs
	# inside of the IREE compiler and this is the minimum amount of hacking that
	# could be done to do that.

	# Default to using our own clang. The NVCC route is preserved as an example but
	# to allow for consistent cross-compiling we default to clang - they should be
	# effectively the same for our purposes (device only code to LTO-IR/PTX).
	set(_BUILD_WITH_NVCC ON)

	if(_BUILD_WITH_NVCC)
	include(CheckLanguage)
	check_language(CUDA)
	if(NOT CMAKE_CUDA_COMPILER)
	message(STATUS "IREE custom_dispatch/cuda/kernels ignored -- nvcc not found")
	return()
	endif()
	enable_language(CUDA)
	endif()

	# Builds a PTX blob using cmake + nvcc from the CUDA SDK.
	function(cuda_kernel_ptx_nvcc _ARCH)
	set(_NAME iree_samples_custom_dispatch_cuda_kernels_ptx_${_ARCH})
	set(_PTX_SRC_NAME "kernels.cu")
	get_filename_component(_PTX_SRC_BASENAME ${_PTX_SRC_NAME} NAME_WE CACHE)
	set(_PTX_OBJ_NAME "${_PTX_SRC_BASENAME}_sm_${_ARCH}")

	add_library(${_NAME}_obj OBJECT)
	target_sources(${_NAME}_obj PRIVATE ${_PTX_SRC_NAME})
	set_source_files_properties(${_PTX_SRC_NAME} PROPERTIES LANGUAGE CUDA)
	set_target_properties(${_NAME}_obj PROPERTIES
	LANGUAGE CUDA
	LINKER_LANGUAGE CUDA
	CUDA_PTX_COMPILATION ON
	CUDA_SEPARABLE_COMPILATION ON
	CUDA_RESOLVE_DEVICE_SYMBOLS ON
	CUDA_ARCHITECTURES "${_ARCH}"
	)

	# This makes my eyes bleed. There is probably a much better way of doing this
	# and I wish the best of luck to those who try. From:
	# https://sourcegraph.com/github.com/NVIDIA/MDL-SDK/-/blob/cmake/utilities.cmake?L1266
	# This sample should probably just invoke nvcc directly.
	get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
	if(_GENERATOR_IS_MULTI_CONFIG)
	set(_PTX_CONFIG_FOLDER /$<CONFIG>)
	set(_CMAKEFILES_FOLDER "")
	else()
	set(_PTX_CONFIG_FOLDER "")
	set(_CMAKEFILES_FOLDER /CMakeFiles)
	endif()
	add_custom_command(
	OUTPUT ${_PTX_OBJ_NAME}.ptx
	DEPENDS $<TARGET_OBJECTS:${_NAME}_obj>
	COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}
	COMMAND ${CMAKE_COMMAND} -E copy_if_different
	${CMAKE_CURRENT_BINARY_DIR}${_CMAKEFILES_FOLDER}/${_NAME}_obj.dir${_PTX_CONFIG_FOLDER}/${_PTX_SRC_BASENAME}.ptx
	${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}.ptx
	)
	add_custom_target(${_NAME} DEPENDS
	${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}.ptx
	)
	add_dependencies(iree-sample-deps "${_NAME}")
	endfunction()

	# Builds a PTX blob using the clang built by IREE from tip-of-tree LLVM.
	function(cuda_kernel_ptx_clang _ARCH)
	set(_NAME iree_samples_custom_dispatch_cuda_kernels_ptx_${_ARCH})
	set(_PTX_SRC_NAME "kernels.cu")
	get_filename_component(_PTX_SRC_BASENAME ${_PTX_SRC_NAME} NAME_WE CACHE)
	set(_PTX_OBJ_NAME "${_PTX_SRC_BASENAME}_sm_${_ARCH}.ptx")
	add_custom_command(
	OUTPUT
	${_PTX_OBJ_NAME}
	DEPENDS
	${_PTX_SRC_NAME}
	${IREE_CLANG_TARGET}
	COMMAND ${IREE_CLANG_BINARY}
	-x cuda
	-Wno-unknown-cuda-version
	--cuda-path=${CUDAToolkit_ROOT}
	--cuda-device-only
	--cuda-gpu-arch=sm_${_ARCH}
	-O2
	-S
	${CMAKE_CURRENT_SOURCE_DIR}/${_PTX_SRC_NAME}
	-o ${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}
	VERBATIM
	)
	add_custom_target(${_NAME} DEPENDS
	${CMAKE_CURRENT_BINARY_DIR}/${_PTX_OBJ_NAME}
	)
	add_dependencies(iree-sample-deps "${_NAME}")
	endfunction()

	# Build the kernels_*.ptx files for each architecture we target.
	if(_BUILD_WITH_NVCC)
	cuda_kernel_ptx_nvcc(52)
	cuda_kernel_ptx_nvcc(80)
	else()
	cuda_kernel_ptx_clang(52)
	cuda_kernel_ptx_clang(80)
	endif()

	iree_lit_test_suite(
	NAME
	example
	SRCS
	"example.mlir"
	TOOLS
	FileCheck
	iree-compile
	iree-run-module
	LABELS
	"driver=cuda"
	"hostonly"
	)

	iree_cuda_bitcode_library(
	NAME
	cuda_ukernel
	CUDA_ARCH
	sm_60
	SRCS
	"ukernel.cu"
	)

	iree_check_single_backend_test_suite(
	NAME
	check_cuda_ukernel
	SRCS
	"ukernel_example.mlir"
	TARGET_BACKEND
	"cuda"
	COMPILER_FLAGS
	"--iree-link-bitcode=cuda_ukernel.bc"
	DRIVER
	"cuda"
	LABELS
	"noasan"
	"nomsan"
	"notsan"
	"noubsan"
	"requires-gpu-nvidia"
	DEPENDS
	::cuda_ukernel
	)